5
5
6
6
import numpy as np
7
7
8
+ try :
9
+ # The BigQuery Storage API client is an optional dependency. It is only
10
+ # required when use_bqstorage_api=True.
11
+ from google .cloud import bigquery_storage_v1beta1
12
+ except ImportError : # pragma: NO COVER
13
+ bigquery_storage_v1beta1 = None
14
+
8
15
from pandas_gbq .exceptions import AccessDenied
9
16
10
17
logger = logging .getLogger (__name__ )
@@ -302,6 +309,7 @@ def __init__(
302
309
dialect = "standard" ,
303
310
location = None ,
304
311
credentials = None ,
312
+ use_bqstorage_api = False ,
305
313
):
306
314
global context
307
315
from google .api_core .exceptions import GoogleAPIError
@@ -352,6 +360,9 @@ def __init__(
352
360
context .project = self .project_id
353
361
354
362
self .client = self .get_client ()
363
+ self .bqstorage_client = _make_bqstorage_client (
364
+ use_bqstorage_api , self .credentials
365
+ )
355
366
356
367
# BQ Queries costs $5 per TB. First 1 TB per month is free
357
368
# see here for more: https://cloud.google.com/bigquery/pricing
@@ -489,7 +500,9 @@ def run_query(self, query, **kwargs):
489
500
490
501
schema_fields = [field .to_api_repr () for field in rows_iter .schema ]
491
502
nullsafe_dtypes = _bqschema_to_nullsafe_dtypes (schema_fields )
492
- df = rows_iter .to_dataframe (dtypes = nullsafe_dtypes )
503
+ df = rows_iter .to_dataframe (
504
+ dtypes = nullsafe_dtypes , bqstorage_client = self .bqstorage_client
505
+ )
493
506
494
507
if df .empty :
495
508
df = _cast_empty_df_dtypes (schema_fields , df )
@@ -702,6 +715,21 @@ def _cast_empty_df_dtypes(schema_fields, df):
702
715
return df
703
716
704
717
718
+ def _make_bqstorage_client (use_bqstorage_api , credentials ):
719
+ if not use_bqstorage_api :
720
+ return None
721
+
722
+ if bigquery_storage_v1beta1 is None :
723
+ raise ImportError (
724
+ "Install the google-cloud-bigquery-storage and fastavro packages "
725
+ "to use the BigQuery Storage API."
726
+ )
727
+
728
+ return bigquery_storage_v1beta1 .BigQueryStorageClient (
729
+ credentials = credentials
730
+ )
731
+
732
+
705
733
def read_gbq (
706
734
query ,
707
735
project_id = None ,
@@ -713,6 +741,7 @@ def read_gbq(
713
741
location = None ,
714
742
configuration = None ,
715
743
credentials = None ,
744
+ use_bqstorage_api = False ,
716
745
verbose = None ,
717
746
private_key = None ,
718
747
):
@@ -790,6 +819,27 @@ def read_gbq(
790
819
:class:`google.oauth2.service_account.Credentials` directly.
791
820
792
821
.. versionadded:: 0.8.0
822
+ use_bqstorage_api : bool, default False
823
+ Use the `BigQuery Storage API
824
+ <https://cloud.google.com/bigquery/docs/reference/storage/>`__ to
825
+ download query results quickly, but at an increased cost. To use this
826
+ API, first `enable it in the Cloud Console
827
+ <https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com>`__.
828
+ You must also have the `bigquery.readsessions.create
829
+ <https://cloud.google.com/bigquery/docs/access-control#roles>`__
830
+ permission on the project you are billing queries to.
831
+
832
+ **Note:** Due to a `known issue in the ``google-cloud-bigquery``
833
+ package
834
+ <https://github.com/googleapis/google-cloud-python/pull/7633>`__
835
+ (fixed in version 1.11.0), you must write your query results to a
836
+ destination table. To do this with ``read_gbq``, supply a
837
+ ``configuration`` dictionary.
838
+
839
+ This feature requires the ``google-cloud-bigquery-storage`` and
840
+ ``fastavro`` packages.
841
+
842
+ .. versionadded:: 0.10.0
793
843
verbose : None, deprecated
794
844
Deprecated in Pandas-GBQ 0.4.0. Use the `logging module
795
845
to adjust verbosity instead
@@ -810,6 +860,27 @@ def read_gbq(
810
860
-------
811
861
df: DataFrame
812
862
DataFrame representing results of query.
863
+
864
+ Examples
865
+ --------
866
+
867
+ Use the BigQuery Storage API to fetch results quickly, but at an addition
868
+ cost. Due to a known issue in the BigQuery Storage API, you must write
869
+ your query results to a destination table.
870
+
871
+ >>> pandas_gbq.read_gbq(
872
+ ... query_string,
873
+ ... configuration={
874
+ ... 'query': {
875
+ ... 'destinationTable': {
876
+ ... 'projectId': 'your-project',
877
+ ... 'datasetId': 'destination_dataset',
878
+ ... 'tableId': 'new_table_name',
879
+ ... }
880
+ ... }
881
+ ... },
882
+ ... use_bqstorage_api=True,
883
+ ... )
813
884
"""
814
885
global context
815
886
@@ -846,6 +917,7 @@ def read_gbq(
846
917
location = location ,
847
918
credentials = credentials ,
848
919
private_key = private_key ,
920
+ use_bqstorage_api = use_bqstorage_api ,
849
921
)
850
922
851
923
final_df = connector .run_query (query , configuration = configuration )
0 commit comments