Skip to content

BUG: Add support to replace partitions in date-partitioned tables (#43) #124

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 26 additions & 15 deletions pandas_gbq/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,10 +337,6 @@ def get_user_account_credentials(self):
This method authenticates using user credentials, either loading saved
credentials from a file or by going through the OAuth flow.

Parameters
----------
None

Returns
-------
GoogleCredentials : credentials
Expand Down Expand Up @@ -567,7 +563,7 @@ def load_data(
try:
for remaining_rows in _load.load_chunks(
self.client, dataframe, dataset_id, table_id,
chunksize=chunksize):
chunksize=chunksize, schema=schema):
self._print("\rLoad is {0}% Complete".format(
((total_rows - remaining_rows) * 100) / total_rows))
except self.http_error as ex:
Expand Down Expand Up @@ -699,8 +695,9 @@ def delete_and_recreate_table(self, dataset_id, table_id, table_schema):
table = _Table(self.project_id, dataset_id,
private_key=self.private_key)
table.delete(table_id)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean that BigQuery lets you delete just a single partition using the table delete operation? I haven't tried that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried it with the decorated table name and it worked as intended.

table.create(table_id, table_schema)
sleep(delay)
if not table.is_date_partitioned(table_id):
table.create(table_id, table_schema)
sleep(delay)


def _get_credentials_file():
Expand Down Expand Up @@ -1007,6 +1004,11 @@ def _generate_bq_schema(df, default_type='STRING'):

class _Table(GbqConnector):

partition_decorator = '$'

def is_date_partitioned(self, table_id):
return self.partition_decorator in table_id

def __init__(self, project_id, dataset_id, reauth=False, verbose=False,
private_key=None):
self.dataset_id = dataset_id
Expand All @@ -1017,7 +1019,7 @@ def exists(self, table_id):

Parameters
----------
table : str
table_id : str
Name of table to be verified

Returns
Expand All @@ -1028,24 +1030,30 @@ def exists(self, table_id):
from google.api_core.exceptions import NotFound

table_ref = self.client.dataset(self.dataset_id).table(table_id)

try:
self.client.get_table(table_ref)
table = self.client.get_table(table_ref)
if self.is_date_partitioned(table_id):
return table.num_rows > 0

return True
except NotFound:
return False
except self.http_error as ex:
self.process_http_error(ex)

def create(self, table_id, schema):
def create(self, table_id, schema, date_partitioned=False):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than pass in date_partitioned here, I'd like to see an argument similar to the configuration argument in read_gbq which can take a dictionary in the JSON API format. That way options like timePartitioning can be set.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense to me! I'll look into including the configuration in the to_gbq call to enable this and rest of the API configuration.

""" Create a table in Google BigQuery given a table and schema

Parameters
----------
table : str
table_id : str
Name of table to be written
schema : str
Use the generate_bq_schema to generate your table schema from a
dataframe.
date_partitioned: boolean
Whether table is to be created as a date partitioned table.
"""
from google.cloud.bigquery import SchemaField
from google.cloud.bigquery import Table
Expand All @@ -1062,6 +1070,9 @@ def create(self, table_id, schema):
table_ref = self.client.dataset(self.dataset_id).table(table_id)
table = Table(table_ref)

if date_partitioned or self.is_date_partitioned(table_id):
table.partitioning_type = 'DAY'

# Manually create the schema objects, adding NULLABLE mode
# as a workaround for
# https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4456
Expand All @@ -1084,7 +1095,7 @@ def delete(self, table_id):

Parameters
----------
table : str
table_id : str
Name of table to be deleted
"""
from google.api_core.exceptions import NotFound
Expand Down Expand Up @@ -1163,7 +1174,7 @@ def create(self, dataset_id):

Parameters
----------
dataset : str
dataset_id : str
Name of dataset to be written
"""
from google.cloud.bigquery import Dataset
Expand All @@ -1184,7 +1195,7 @@ def delete(self, dataset_id):

Parameters
----------
dataset : str
dataset_id : str
Name of dataset to be deleted
"""
from google.api_core.exceptions import NotFound
Expand All @@ -1207,7 +1218,7 @@ def tables(self, dataset_id):

Parameters
----------
dataset : str
dataset_id : str
Name of dataset to list tables for

Returns
Expand Down