5
5
6
6
import numpy as np
7
7
8
+ try :
9
+ # The BigQuery Storage API client is an optional dependency. It is only
10
+ # required when use_bqstorage_api=True.
11
+ from google .cloud import bigquery_storage_v1beta1
12
+ except ImportError : # pragma: NO COVER
13
+ bigquery_storage_v1beta1 = None
14
+
8
15
from pandas_gbq .exceptions import AccessDenied
9
16
10
17
logger = logging .getLogger (__name__ )
@@ -302,6 +309,7 @@ def __init__(
302
309
dialect = "standard" ,
303
310
location = None ,
304
311
credentials = None ,
312
+ use_bqstorage_api = False ,
305
313
):
306
314
global context
307
315
from google .api_core .exceptions import GoogleAPIError
@@ -352,6 +360,9 @@ def __init__(
352
360
context .project = self .project_id
353
361
354
362
self .client = self .get_client ()
363
+ self .bqstorage_client = _make_bqstorage_client (
364
+ use_bqstorage_api , self .credentials
365
+ )
355
366
356
367
# BQ Queries costs $5 per TB. First 1 TB per month is free
357
368
# see here for more: https://cloud.google.com/bigquery/pricing
@@ -489,7 +500,9 @@ def run_query(self, query, **kwargs):
489
500
490
501
schema_fields = [field .to_api_repr () for field in rows_iter .schema ]
491
502
nullsafe_dtypes = _bqschema_to_nullsafe_dtypes (schema_fields )
492
- df = rows_iter .to_dataframe (dtypes = nullsafe_dtypes )
503
+ df = rows_iter .to_dataframe (
504
+ dtypes = nullsafe_dtypes , bqstorage_client = self .bqstorage_client
505
+ )
493
506
494
507
if df .empty :
495
508
df = _cast_empty_df_dtypes (schema_fields , df )
@@ -727,6 +740,21 @@ def _localize_df(schema_fields, df):
727
740
return df
728
741
729
742
743
+ def _make_bqstorage_client (use_bqstorage_api , credentials ):
744
+ if not use_bqstorage_api :
745
+ return None
746
+
747
+ if bigquery_storage_v1beta1 is None :
748
+ raise ImportError (
749
+ "Install the google-cloud-bigquery-storage and fastavro packages "
750
+ "to use the BigQuery Storage API."
751
+ )
752
+
753
+ return bigquery_storage_v1beta1 .BigQueryStorageClient (
754
+ credentials = credentials
755
+ )
756
+
757
+
730
758
def read_gbq (
731
759
query ,
732
760
project_id = None ,
@@ -738,6 +766,7 @@ def read_gbq(
738
766
location = None ,
739
767
configuration = None ,
740
768
credentials = None ,
769
+ use_bqstorage_api = False ,
741
770
verbose = None ,
742
771
private_key = None ,
743
772
):
@@ -815,6 +844,27 @@ def read_gbq(
815
844
:class:`google.oauth2.service_account.Credentials` directly.
816
845
817
846
.. versionadded:: 0.8.0
847
+ use_bqstorage_api : bool, default False
848
+ Use the `BigQuery Storage API
849
+ <https://cloud.google.com/bigquery/docs/reference/storage/>`__ to
850
+ download query results quickly, but at an increased cost. To use this
851
+ API, first `enable it in the Cloud Console
852
+ <https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com>`__.
853
+ You must also have the `bigquery.readsessions.create
854
+ <https://cloud.google.com/bigquery/docs/access-control#roles>`__
855
+ permission on the project you are billing queries to.
856
+
857
+ **Note:** Due to a `known issue in the ``google-cloud-bigquery``
858
+ package
859
+ <https://github.com/googleapis/google-cloud-python/pull/7633>`__
860
+ (fixed in version 1.11.0), you must write your query results to a
861
+ destination table. To do this with ``read_gbq``, supply a
862
+ ``configuration`` dictionary.
863
+
864
+ This feature requires the ``google-cloud-bigquery-storage`` and
865
+ ``fastavro`` packages.
866
+
867
+ .. versionadded:: 0.10.0
818
868
verbose : None, deprecated
819
869
Deprecated in Pandas-GBQ 0.4.0. Use the `logging module
820
870
to adjust verbosity instead
@@ -835,6 +885,27 @@ def read_gbq(
835
885
-------
836
886
df: DataFrame
837
887
DataFrame representing results of query.
888
+
889
+ Examples
890
+ --------
891
+
892
+ Use the BigQuery Storage API to fetch results quickly, but at an addition
893
+ cost. Due to a known issue in the BigQuery Storage API, you must write
894
+ your query results to a destination table.
895
+
896
+ >>> pandas_gbq.read_gbq(
897
+ ... query_string,
898
+ ... configuration={
899
+ ... 'query': {
900
+ ... 'destinationTable': {
901
+ ... 'projectId': 'your-project',
902
+ ... 'datasetId': 'destination_dataset',
903
+ ... 'tableId': 'new_table_name',
904
+ ... }
905
+ ... }
906
+ ... },
907
+ ... use_bqstorage_api=True,
908
+ ... )
838
909
"""
839
910
global context
840
911
@@ -871,6 +942,7 @@ def read_gbq(
871
942
location = location ,
872
943
credentials = credentials ,
873
944
private_key = private_key ,
945
+ use_bqstorage_api = use_bqstorage_api ,
874
946
)
875
947
876
948
final_df = connector .run_query (query , configuration = configuration )
0 commit comments