Skip to content

feat: to_gbq uses Parquet by default, use api_method="load_csv" for old behavior #413

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Nov 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CONTRIBUTING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ Running System Tests

.. note::

System tests are only configured to run under Python 3.8 and 3.9.
System tests are only configured to run under Python 3.7, 3.8 and 3.9.
For expediency, we do not run them in older versions of Python 3.

This alone will not run the tests. You'll need to change some local
Expand Down
3 changes: 2 additions & 1 deletion ci/requirements-3.7-0.23.2.conda
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ codecov
coverage
fastavro
flake8
numpy==1.14.5
numpy==1.16.6
google-cloud-bigquery==1.11.1
pyarrow==3.0.0
pydata-google-auth
pytest
pytest-cov
Expand Down
1 change: 1 addition & 0 deletions ci/requirements-3.9-NIGHTLY.conda
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pydata-google-auth
google-cloud-bigquery
google-cloud-bigquery-storage
pyarrow
pytest
pytest-cov
codecov
Expand Down
3 changes: 2 additions & 1 deletion docs/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Install from Source

.. code-block:: shell

$ pip install git+https://github.com/pydata/pandas-gbq.git
$ pip install git+https://github.com/googleapis/python-bigquery-pandas.git


Dependencies
Expand All @@ -38,6 +38,7 @@ Dependencies
This module requires following additional dependencies:

- `pydata-google-auth <https://github.com/pydata/pydata-google-auth>`__: Helpers for authentication to Google's API
- `pyarrow <https://arrow.apache.org/docs/python/>`__: Format for getting data to/from Google BigQuery
- `google-auth <https://github.com/GoogleCloudPlatform/google-auth-library-python>`__: authentication and authorization for Google's API
- `google-auth-oauthlib <https://github.com/GoogleCloudPlatform/google-auth-library-python-oauthlib>`__: integration with `oauthlib <https://github.com/idan/oauthlib>`__ for end-user authentication
- `google-cloud-bigquery <https://googleapis.dev/python/bigquery/latest/index.html>`__: Google Cloud client library for BigQuery
Expand Down
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
BLACK_PATHS = ["docs", "pandas_gbq", "tests", "noxfile.py", "setup.py"]

DEFAULT_PYTHON_VERSION = "3.8"
SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.9"]
SYSTEM_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9"]
UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9"]

CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute()
Expand Down
2 changes: 1 addition & 1 deletion owlbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
extras = ["tqdm"]
templated_files = common.py_library(
unit_test_python_versions=["3.7", "3.8", "3.9"],
system_test_python_versions=["3.8", "3.9"],
system_test_python_versions=["3.7", "3.8", "3.9"],
cov_level=86,
unit_test_extras=extras,
system_test_extras=extras,
Expand Down
15 changes: 12 additions & 3 deletions pandas_gbq/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,30 @@
# license that can be found in the LICENSE file.


class GenericGBQException(ValueError):
"""
Raised when an unrecognized Google API Error occurs.
"""


class AccessDenied(ValueError):
"""
Raised when invalid credentials are provided, or tokens have expired.
"""

pass

class ConversionError(GenericGBQException):
"""
Raised when there is a problem converting the DataFrame to a format
required to upload it to BigQuery.
"""


class InvalidPrivateKeyFormat(ValueError):
"""
Raised when provided private key has invalid format.
"""

pass


class PerformanceWarning(RuntimeWarning):
"""
Expand Down
10 changes: 10 additions & 0 deletions pandas_gbq/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
BIGQUERY_BQSTORAGE_VERSION = "1.24.0"
BIGQUERY_FROM_DATAFRAME_CSV_VERSION = "2.6.0"
PANDAS_VERBOSITY_DEPRECATION_VERSION = "0.23.0"
PANDAS_PARQUET_LOSSLESS_TIMESTAMP_VERSION = "1.1.0"


class Features:
Expand Down Expand Up @@ -89,5 +90,14 @@ def pandas_has_deprecated_verbose(self):
)
return self.pandas_installed_version >= pandas_verbosity_deprecation

@property
def pandas_has_parquet_with_lossless_timestamp(self):
import pkg_resources

desired_version = pkg_resources.parse_version(
PANDAS_PARQUET_LOSSLESS_TIMESTAMP_VERSION
)
return self.pandas_installed_version >= desired_version


FEATURES = Features()
49 changes: 38 additions & 11 deletions pandas_gbq/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@
bigquery = None
google_exceptions = None

from pandas_gbq.exceptions import AccessDenied
from pandas_gbq.exceptions import PerformanceWarning
from pandas_gbq.exceptions import (
AccessDenied,
GenericGBQException,
PerformanceWarning,
)
from pandas_gbq import features
from pandas_gbq.features import FEATURES
import pandas_gbq.schema
Expand Down Expand Up @@ -69,14 +72,6 @@ class DatasetCreationError(ValueError):
pass


class GenericGBQException(ValueError):
"""
Raised when an unrecognized Google API Error occurs.
"""

pass


class InvalidColumnOrder(ValueError):
"""
Raised when the provided column order for output
Expand Down Expand Up @@ -520,7 +515,7 @@ def _download_results(
df = rows_iter.to_dataframe(
dtypes=conversion_dtypes,
progress_bar_type=progress_bar_type,
**to_dataframe_kwargs
**to_dataframe_kwargs,
)
except self.http_error as ex:
self.process_http_error(ex)
Expand All @@ -541,6 +536,7 @@ def load_data(
chunksize=None,
schema=None,
progress_bar=True,
api_method: str = "load_parquet",
):
from pandas_gbq import load

Expand All @@ -554,6 +550,7 @@ def load_data(
chunksize=chunksize,
schema=schema,
location=self.location,
api_method=api_method,
)
if progress_bar and tqdm:
chunks = tqdm.tqdm(chunks)
Expand Down Expand Up @@ -876,6 +873,7 @@ def to_gbq(
location=None,
progress_bar=True,
credentials=None,
api_method: str = "default",
verbose=None,
private_key=None,
):
Expand Down Expand Up @@ -964,6 +962,12 @@ def to_gbq(
:class:`google.oauth2.service_account.Credentials` directly.

.. versionadded:: 0.8.0
api_method : str, optional
API method used to upload DataFrame to BigQuery. One of "load_parquet",
"load_csv". Default "load_parquet" if pandas is version 1.1.0+,
otherwise "load_csv".

.. versionadded:: 0.16.0
verbose : bool, deprecated
Deprecated in Pandas-GBQ 0.4.0. Use the `logging module
to adjust verbosity instead
Expand All @@ -988,6 +992,28 @@ def to_gbq(
stacklevel=1,
)

if api_method == "default":
# Avoid using parquet if pandas doesn't support lossless conversions to
# parquet timestamp. See: https://stackoverflow.com/a/69758676/101923
if FEATURES.pandas_has_parquet_with_lossless_timestamp:
api_method = "load_parquet"
else:
api_method = "load_csv"

if chunksize is not None:
if api_method == "load_parquet":
warnings.warn(
"chunksize is ignored when using api_method='load_parquet'",
DeprecationWarning,
stacklevel=2,
)
elif api_method == "load_csv":
warnings.warn(
"chunksize will be ignored when using api_method='load_csv' in a future version of pandas-gbq",
PendingDeprecationWarning,
stacklevel=2,
)

if if_exists not in ("fail", "replace", "append"):
raise ValueError("'{0}' is not valid for if_exists".format(if_exists))

Expand Down Expand Up @@ -1071,6 +1097,7 @@ def to_gbq(
chunksize=chunksize,
schema=table_schema,
progress_bar=progress_bar,
api_method=api_method,
)


Expand Down
Loading