@@ -556,45 +556,22 @@ def run_query(self, query, **kwargs):
556
556
557
557
return schema , result_rows
558
558
559
- def load_data (self , dataframe , dataset_id , table_id , chunksize ):
560
- from google .cloud .bigquery import LoadJobConfig
561
- from six import BytesIO
562
-
563
- destination_table = self .client .dataset (dataset_id ).table (table_id )
564
- job_config = LoadJobConfig ()
565
- job_config .write_disposition = 'WRITE_APPEND'
566
- job_config .source_format = 'NEWLINE_DELIMITED_JSON'
567
- rows = []
568
- remaining_rows = len (dataframe )
569
-
570
- total_rows = remaining_rows
571
- self ._print ("\n \n " )
559
+ def load_data (
560
+ self , dataframe , dataset_id , table_id , chunksize = None ,
561
+ schema = None ):
562
+ from pandas_gbq import _load
572
563
573
- for index , row in dataframe .reset_index (drop = True ).iterrows ():
574
- row_json = row .to_json (
575
- force_ascii = False , date_unit = 's' , date_format = 'iso' )
576
- rows .append (row_json )
577
- remaining_rows -= 1
564
+ total_rows = len (dataframe )
565
+ self ._print ("\n \n " )
578
566
579
- if (len (rows ) % chunksize == 0 ) or (remaining_rows == 0 ):
567
+ try :
568
+ for remaining_rows in _load .load_chunks (
569
+ self .client , dataframe , dataset_id , table_id ,
570
+ chunksize = chunksize ):
580
571
self ._print ("\r Load is {0}% Complete" .format (
581
572
((total_rows - remaining_rows ) * 100 ) / total_rows ))
582
-
583
- body = '{}\n ' .format ('\n ' .join (rows ))
584
- if isinstance (body , bytes ):
585
- body = body .decode ('utf-8' )
586
- body = body .encode ('utf-8' )
587
- body = BytesIO (body )
588
-
589
- try :
590
- self .client .load_table_from_file (
591
- body ,
592
- destination_table ,
593
- job_config = job_config ).result ()
594
- except self .http_error as ex :
595
- self .process_http_error (ex )
596
-
597
- rows = []
573
+ except self .http_error as ex :
574
+ self .process_http_error (ex )
598
575
599
576
self ._print ("\n " )
600
577
@@ -888,7 +865,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
888
865
return final_df
889
866
890
867
891
- def to_gbq (dataframe , destination_table , project_id , chunksize = 10000 ,
868
+ def to_gbq (dataframe , destination_table , project_id , chunksize = None ,
892
869
verbose = True , reauth = False , if_exists = 'fail' , private_key = None ,
893
870
auth_local_webserver = False , table_schema = None ):
894
871
"""Write a DataFrame to a Google BigQuery table.
@@ -922,8 +899,9 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
922
899
Name of table to be written, in the form 'dataset.tablename'
923
900
project_id : str
924
901
Google BigQuery Account project ID.
925
- chunksize : int (default 10000)
926
- Number of rows to be inserted in each chunk from the dataframe.
902
+ chunksize : int (default None)
903
+ Number of rows to be inserted in each chunk from the dataframe. Use
904
+ ``None`` to load the dataframe in a single chunk.
927
905
verbose : boolean (default True)
928
906
Show percentage complete
929
907
reauth : boolean (default False)
@@ -985,7 +963,7 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
985
963
raise TableCreationError ("Could not create the table because it "
986
964
"already exists. "
987
965
"Change the if_exists parameter to "
988
- "append or replace data." )
966
+ "' append' or ' replace' data." )
989
967
elif if_exists == 'replace' :
990
968
connector .delete_and_recreate_table (
991
969
dataset_id , table_id , table_schema )
@@ -999,19 +977,14 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
999
977
else :
1000
978
table .create (table_id , table_schema )
1001
979
1002
- connector .load_data (dataframe , dataset_id , table_id , chunksize )
980
+ connector .load_data (
981
+ dataframe , dataset_id , table_id , chunksize = chunksize ,
982
+ schema = table_schema )
1003
983
1004
984
1005
985
def generate_bq_schema (df , default_type = 'STRING' ):
1006
- # deprecation TimeSeries, #11121
1007
- warnings .warn ("generate_bq_schema is deprecated and will be removed in "
1008
- "a future version" , FutureWarning , stacklevel = 2 )
1009
-
1010
- return _generate_bq_schema (df , default_type = default_type )
1011
-
1012
-
1013
- def _generate_bq_schema (df , default_type = 'STRING' ):
1014
- """ Given a passed df, generate the associated Google BigQuery schema.
986
+ """DEPRECATED: Given a passed df, generate the associated Google BigQuery
987
+ schema.
1015
988
1016
989
Parameters
1017
990
----------
@@ -1020,23 +993,16 @@ def _generate_bq_schema(df, default_type='STRING'):
1020
993
The default big query type in case the type of the column
1021
994
does not exist in the schema.
1022
995
"""
996
+ # deprecation TimeSeries, #11121
997
+ warnings .warn ("generate_bq_schema is deprecated and will be removed in "
998
+ "a future version" , FutureWarning , stacklevel = 2 )
1023
999
1024
- type_mapping = {
1025
- 'i' : 'INTEGER' ,
1026
- 'b' : 'BOOLEAN' ,
1027
- 'f' : 'FLOAT' ,
1028
- 'O' : 'STRING' ,
1029
- 'S' : 'STRING' ,
1030
- 'U' : 'STRING' ,
1031
- 'M' : 'TIMESTAMP'
1032
- }
1000
+ return _generate_bq_schema (df , default_type = default_type )
1033
1001
1034
- fields = []
1035
- for column_name , dtype in df .dtypes .iteritems ():
1036
- fields .append ({'name' : column_name ,
1037
- 'type' : type_mapping .get (dtype .kind , default_type )})
1038
1002
1039
- return {'fields' : fields }
1003
+ def _generate_bq_schema (df , default_type = 'STRING' ):
1004
+ from pandas_gbq import _schema
1005
+ return _schema .generate_bq_schema (df , default_type = default_type )
1040
1006
1041
1007
1042
1008
class _Table (GbqConnector ):
@@ -1096,6 +1062,9 @@ def create(self, table_id, schema):
1096
1062
table_ref = self .client .dataset (self .dataset_id ).table (table_id )
1097
1063
table = Table (table_ref )
1098
1064
1065
+ # Manually create the schema objects, adding NULLABLE mode
1066
+ # as a workaround for
1067
+ # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4456
1099
1068
for field in schema ['fields' ]:
1100
1069
if 'mode' not in field :
1101
1070
field ['mode' ] = 'NULLABLE'
0 commit comments