Skip to content

Commit 891a00c

Browse files
tswastgcf-owl-bot[bot]parthea
authored
fix: use data project for destination in to_gbq (#455)
* fix: use data project for destination in `to_gbq` * bump coverage * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * improve test coverage Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Anthonios Partheniou <partheniou@google.com>
1 parent 4beeb54 commit 891a00c

File tree

3 files changed

+85
-8
lines changed

3 files changed

+85
-8
lines changed

pandas_gbq/gbq.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,7 @@ def load_data(
549549
schema=None,
550550
progress_bar=True,
551551
api_method: str = "load_parquet",
552+
billing_project: Optional[str] = None,
552553
):
553554
from pandas_gbq import load
554555

@@ -563,6 +564,7 @@ def load_data(
563564
schema=schema,
564565
location=self.location,
565566
api_method=api_method,
567+
billing_project=billing_project,
566568
)
567569
if progress_bar and tqdm:
568570
chunks = tqdm.tqdm(chunks)
@@ -575,8 +577,8 @@ def load_data(
575577
except self.http_error as ex:
576578
self.process_http_error(ex)
577579

578-
def delete_and_recreate_table(self, dataset_id, table_id, table_schema):
579-
table = _Table(self.project_id, dataset_id, credentials=self.credentials)
580+
def delete_and_recreate_table(self, project_id, dataset_id, table_id, table_schema):
581+
table = _Table(project_id, dataset_id, credentials=self.credentials)
580582
table.delete(table_id)
581583
table.create(table_id, table_schema)
582584

@@ -1113,7 +1115,9 @@ def to_gbq(
11131115
"'append' or 'replace' data."
11141116
)
11151117
elif if_exists == "replace":
1116-
connector.delete_and_recreate_table(dataset_id, table_id, table_schema)
1118+
connector.delete_and_recreate_table(
1119+
project_id_table, dataset_id, table_id, table_schema
1120+
)
11171121
else:
11181122
if not pandas_gbq.schema.schema_is_subset(original_schema, table_schema):
11191123
raise InvalidSchema(
@@ -1142,6 +1146,7 @@ def to_gbq(
11421146
schema=table_schema,
11431147
progress_bar=progress_bar,
11441148
api_method=api_method,
1149+
billing_project=project_id,
11451150
)
11461151

11471152

pandas_gbq/load.py

+37-5
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ def load_parquet(
114114
destination_table_ref: bigquery.TableReference,
115115
location: Optional[str],
116116
schema: Optional[Dict[str, Any]],
117+
billing_project: Optional[str] = None,
117118
):
118119
job_config = bigquery.LoadJobConfig()
119120
job_config.write_disposition = "WRITE_APPEND"
@@ -126,7 +127,11 @@ def load_parquet(
126127

127128
try:
128129
client.load_table_from_dataframe(
129-
dataframe, destination_table_ref, job_config=job_config, location=location,
130+
dataframe,
131+
destination_table_ref,
132+
job_config=job_config,
133+
location=location,
134+
project=billing_project,
130135
).result()
131136
except pyarrow.lib.ArrowInvalid as exc:
132137
raise exceptions.ConversionError(
@@ -162,6 +167,7 @@ def load_csv_from_dataframe(
162167
location: Optional[str],
163168
chunksize: Optional[int],
164169
schema: Optional[Dict[str, Any]],
170+
billing_project: Optional[str] = None,
165171
):
166172
bq_schema = None
167173

@@ -171,7 +177,11 @@ def load_csv_from_dataframe(
171177

172178
def load_chunk(chunk, job_config):
173179
client.load_table_from_dataframe(
174-
chunk, destination_table_ref, job_config=job_config, location=location,
180+
chunk,
181+
destination_table_ref,
182+
job_config=job_config,
183+
location=location,
184+
project=billing_project,
175185
).result()
176186

177187
return load_csv(dataframe, chunksize, bq_schema, load_chunk)
@@ -184,6 +194,7 @@ def load_csv_from_file(
184194
location: Optional[str],
185195
chunksize: Optional[int],
186196
schema: Optional[Dict[str, Any]],
197+
billing_project: Optional[str] = None,
187198
):
188199
"""Manually encode a DataFrame to CSV and use the buffer in a load job.
189200
@@ -204,6 +215,7 @@ def load_chunk(chunk, job_config):
204215
destination_table_ref,
205216
job_config=job_config,
206217
location=location,
218+
project=billing_project,
207219
).result()
208220
finally:
209221
chunk_buffer.close()
@@ -219,19 +231,39 @@ def load_chunks(
219231
schema=None,
220232
location=None,
221233
api_method="load_parquet",
234+
billing_project: Optional[str] = None,
222235
):
223236
if api_method == "load_parquet":
224-
load_parquet(client, dataframe, destination_table_ref, location, schema)
237+
load_parquet(
238+
client,
239+
dataframe,
240+
destination_table_ref,
241+
location,
242+
schema,
243+
billing_project=billing_project,
244+
)
225245
# TODO: yield progress depending on result() with timeout
226246
return [0]
227247
elif api_method == "load_csv":
228248
if FEATURES.bigquery_has_from_dataframe_with_csv:
229249
return load_csv_from_dataframe(
230-
client, dataframe, destination_table_ref, location, chunksize, schema
250+
client,
251+
dataframe,
252+
destination_table_ref,
253+
location,
254+
chunksize,
255+
schema,
256+
billing_project=billing_project,
231257
)
232258
else:
233259
return load_csv_from_file(
234-
client, dataframe, destination_table_ref, location, chunksize, schema
260+
client,
261+
dataframe,
262+
destination_table_ref,
263+
location,
264+
chunksize,
265+
schema,
266+
billing_project=billing_project,
235267
)
236268
else:
237269
raise ValueError(

tests/unit/test_to_gbq.py

+40
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,46 @@ def test_to_gbq_with_if_exists_replace(mock_bigquery_client):
131131
assert mock_bigquery_client.create_table.called
132132

133133

134+
def test_to_gbq_with_if_exists_replace_cross_project(
135+
mock_bigquery_client, expected_load_method
136+
):
137+
mock_bigquery_client.get_table.side_effect = (
138+
# Initial check
139+
google.cloud.bigquery.Table("data-project.my_dataset.my_table"),
140+
# Recreate check
141+
google.api_core.exceptions.NotFound("my_table"),
142+
)
143+
gbq.to_gbq(
144+
DataFrame([[1]]),
145+
"data-project.my_dataset.my_table",
146+
project_id="billing-project",
147+
if_exists="replace",
148+
)
149+
# TODO: We can avoid these API calls by using write disposition in the load
150+
# job. See: https://github.com/googleapis/python-bigquery-pandas/issues/118
151+
assert mock_bigquery_client.delete_table.called
152+
args, _ = mock_bigquery_client.delete_table.call_args
153+
table_delete: google.cloud.bigquery.TableReference = args[0]
154+
assert table_delete.project == "data-project"
155+
assert table_delete.dataset_id == "my_dataset"
156+
assert table_delete.table_id == "my_table"
157+
assert mock_bigquery_client.create_table.called
158+
args, _ = mock_bigquery_client.create_table.call_args
159+
table_create: google.cloud.bigquery.TableReference = args[0]
160+
assert table_create.project == "data-project"
161+
assert table_create.dataset_id == "my_dataset"
162+
assert table_create.table_id == "my_table"
163+
164+
# Check that billing project and destination table is set correctly.
165+
expected_load_method.assert_called_once()
166+
load_args, load_kwargs = expected_load_method.call_args
167+
table_destination = load_args[1]
168+
assert table_destination.project == "data-project"
169+
assert table_destination.dataset_id == "my_dataset"
170+
assert table_destination.table_id == "my_table"
171+
assert load_kwargs["project"] == "billing-project"
172+
173+
134174
def test_to_gbq_with_if_exists_unknown():
135175
with pytest.raises(ValueError):
136176
gbq.to_gbq(

0 commit comments

Comments
 (0)