diff --git a/.coveragerc b/.coveragerc index 0a3b1cea..d6261761 100644 --- a/.coveragerc +++ b/.coveragerc @@ -22,7 +22,7 @@ omit = google/cloud/__init__.py [report] -fail_under = 94 +fail_under = 96 show_missing = True exclude_lines = # Re-enable the standard pragma diff --git a/noxfile.py b/noxfile.py index 5e41983b..1b719448 100644 --- a/noxfile.py +++ b/noxfile.py @@ -95,7 +95,11 @@ def default(session): constraints_path, ) - session.install("-e", ".[tqdm]", "-c", constraints_path) + if session.python == "3.9": + extras = "" + else: + extras = "[tqdm]" + session.install("-e", f".{extras}", "-c", constraints_path) # Run py.test against the unit tests. session.run( @@ -259,7 +263,7 @@ def cover(session): test runs (not system test runs), and then erases coverage data. """ session.install("coverage", "pytest-cov") - session.run("coverage", "report", "--show-missing", "--fail-under=94") + session.run("coverage", "report", "--show-missing", "--fail-under=96") session.run("coverage", "erase") diff --git a/owlbot.py b/owlbot.py index 62c9f3c4..3ec9f49c 100644 --- a/owlbot.py +++ b/owlbot.py @@ -29,12 +29,17 @@ # Add templated files # ---------------------------------------------------------------------------- +extras_by_python = { + # Use a middle version of Python to test when no extras are installed. + "3.9": [] +} extras = ["tqdm"] templated_files = common.py_library( unit_test_python_versions=["3.7", "3.8", "3.9", "3.10"], system_test_python_versions=["3.7", "3.8", "3.9", "3.10"], - cov_level=94, + cov_level=96, unit_test_extras=extras, + unit_test_extras_by_python=extras_by_python, system_test_extras=extras, intersphinx_dependencies={ "pandas": "https://pandas.pydata.org/pandas-docs/stable/", @@ -71,6 +76,11 @@ ["noxfile.py"], "--cov=google", "--cov=pandas_gbq", ) +# Workaround for https://github.com/googleapis/synthtool/issues/1317 +s.replace( + ["noxfile.py"], r'extras = "\[\]"', 'extras = ""', +) + s.replace( ["noxfile.py"], r"@nox.session\(python=DEFAULT_PYTHON_VERSION\)\s+def cover\(session\):", diff --git a/pandas_gbq/features.py b/pandas_gbq/features.py index 77535041..ad20c640 100644 --- a/pandas_gbq/features.py +++ b/pandas_gbq/features.py @@ -5,9 +5,7 @@ """Module for checking dependency versions and supported features.""" # https://github.com/googleapis/python-bigquery/blob/master/CHANGELOG.md -BIGQUERY_MINIMUM_VERSION = "1.11.1" -BIGQUERY_CLIENT_INFO_VERSION = "1.12.0" -BIGQUERY_BQSTORAGE_VERSION = "1.24.0" +BIGQUERY_MINIMUM_VERSION = "1.27.2" BIGQUERY_ACCURATE_TIMESTAMP_VERSION = "2.6.0" BIGQUERY_FROM_DATAFRAME_CSV_VERSION = "2.6.0" BIGQUERY_SUPPORTS_BIGNUMERIC_VERSION = "2.10.0" @@ -52,15 +50,6 @@ def bigquery_has_accurate_timestamp(self): min_version = pkg_resources.parse_version(BIGQUERY_ACCURATE_TIMESTAMP_VERSION) return self.bigquery_installed_version >= min_version - @property - def bigquery_has_client_info(self): - import pkg_resources - - bigquery_client_info_version = pkg_resources.parse_version( - BIGQUERY_CLIENT_INFO_VERSION - ) - return self.bigquery_installed_version >= bigquery_client_info_version - @property def bigquery_has_bignumeric(self): import pkg_resources @@ -68,15 +57,6 @@ def bigquery_has_bignumeric(self): min_version = pkg_resources.parse_version(BIGQUERY_SUPPORTS_BIGNUMERIC_VERSION) return self.bigquery_installed_version >= min_version - @property - def bigquery_has_bqstorage(self): - import pkg_resources - - bigquery_bqstorage_version = pkg_resources.parse_version( - BIGQUERY_BQSTORAGE_VERSION - ) - return self.bigquery_installed_version >= bigquery_bqstorage_version - @property def bigquery_has_from_dataframe_with_csv(self): import pkg_resources diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index feca5e2a..0edac95d 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -18,21 +18,10 @@ if typing.TYPE_CHECKING: # pragma: NO COVER import pandas -# Required dependencies, but treat as optional so that _test_google_api_imports -# can provide a better error message. -try: - from google.api_core import exceptions as google_exceptions - from google.cloud import bigquery -except ImportError: # pragma: NO COVER - bigquery = None - google_exceptions = None - from pandas_gbq.exceptions import ( AccessDenied, GenericGBQException, - PerformanceWarning, ) -from pandas_gbq import features from pandas_gbq.features import FEATURES import pandas_gbq.schema import pandas_gbq.timestamp @@ -48,32 +37,32 @@ def _test_google_api_imports(): try: import pkg_resources # noqa - except ImportError as ex: + except ImportError as ex: # pragma: NO COVER raise ImportError("pandas-gbq requires setuptools") from ex try: import db_dtypes # noqa - except ImportError as ex: + except ImportError as ex: # pragma: NO COVER raise ImportError("pandas-gbq requires db-dtypes") from ex try: import pydata_google_auth # noqa - except ImportError as ex: + except ImportError as ex: # pragma: NO COVER raise ImportError("pandas-gbq requires pydata-google-auth") from ex try: from google_auth_oauthlib.flow import InstalledAppFlow # noqa - except ImportError as ex: + except ImportError as ex: # pragma: NO COVER raise ImportError("pandas-gbq requires google-auth-oauthlib") from ex try: import google.auth # noqa - except ImportError as ex: + except ImportError as ex: # pragma: NO COVER raise ImportError("pandas-gbq requires google-auth") from ex try: from google.cloud import bigquery # noqa - except ImportError as ex: + except ImportError as ex: # pragma: NO COVER raise ImportError("pandas-gbq requires google-cloud-bigquery") from ex @@ -372,23 +361,17 @@ def sizeof_fmt(num, suffix="B"): def get_client(self): import google.api_core.client_info + from google.cloud import bigquery import pandas client_info = google.api_core.client_info.ClientInfo( user_agent="pandas-{}".format(pandas.__version__) ) - - # In addition to new enough version of google-api-core, a new enough - # version of google-cloud-bigquery is required to populate the - # client_info. - if FEATURES.bigquery_has_client_info: - return bigquery.Client( - project=self.project_id, - credentials=self.credentials, - client_info=client_info, - ) - - return bigquery.Client(project=self.project_id, credentials=self.credentials) + return bigquery.Client( + project=self.project_id, + credentials=self.credentials, + client_info=client_info, + ) @staticmethod def process_http_error(ex): @@ -404,6 +387,8 @@ def download_table( progress_bar_type: Optional[str] = None, dtypes: Optional[Dict[str, Union[str, Any]]] = None, ) -> "pandas.DataFrame": + from google.cloud import bigquery + self._start_timer() try: @@ -424,6 +409,7 @@ def download_table( def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): from concurrent.futures import TimeoutError from google.auth.exceptions import RefreshError + from google.cloud import bigquery job_config = { "query": { @@ -529,27 +515,11 @@ def _download_results( if user_dtypes is None: user_dtypes = {} - if self.use_bqstorage_api and not FEATURES.bigquery_has_bqstorage: - warnings.warn( - ( - "use_bqstorage_api was set, but have google-cloud-bigquery " - "version {}. Requires google-cloud-bigquery version " - "{} or later." - ).format( - FEATURES.bigquery_installed_version, - features.BIGQUERY_BQSTORAGE_VERSION, - ), - PerformanceWarning, - stacklevel=4, - ) - create_bqstorage_client = self.use_bqstorage_api if max_results is not None: create_bqstorage_client = False to_dataframe_kwargs = {} - if FEATURES.bigquery_has_bqstorage: - to_dataframe_kwargs["create_bqstorage_client"] = create_bqstorage_client if FEATURES.bigquery_needs_date_as_object: to_dataframe_kwargs["date_as_object"] = True @@ -560,6 +530,7 @@ def _download_results( df = rows_iter.to_dataframe( dtypes=conversion_dtypes, progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, **to_dataframe_kwargs, ) except self.http_error as ex: @@ -1051,6 +1022,9 @@ def to_gbq( _test_google_api_imports() + from google.api_core import exceptions as google_exceptions + from google.cloud import bigquery + if verbose is not None and FEATURES.pandas_has_deprecated_verbose: warnings.warn( "verbose is deprecated and will be removed in " diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index ec588a3e..214b1f74 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -500,21 +500,6 @@ def test_timeout_configuration(self, project_id): configuration=config, ) - def test_query_response_bytes(self): - assert self.gbq_connector.sizeof_fmt(999) == "999.0 B" - assert self.gbq_connector.sizeof_fmt(1024) == "1.0 KB" - assert self.gbq_connector.sizeof_fmt(1099) == "1.1 KB" - assert self.gbq_connector.sizeof_fmt(1044480) == "1020.0 KB" - assert self.gbq_connector.sizeof_fmt(1048576) == "1.0 MB" - assert self.gbq_connector.sizeof_fmt(1048576000) == "1000.0 MB" - assert self.gbq_connector.sizeof_fmt(1073741824) == "1.0 GB" - assert self.gbq_connector.sizeof_fmt(1.099512e12) == "1.0 TB" - assert self.gbq_connector.sizeof_fmt(1.125900e15) == "1.0 PB" - assert self.gbq_connector.sizeof_fmt(1.152922e18) == "1.0 EB" - assert self.gbq_connector.sizeof_fmt(1.180592e21) == "1.0 ZB" - assert self.gbq_connector.sizeof_fmt(1.208926e24) == "1.0 YB" - assert self.gbq_connector.sizeof_fmt(1.208926e28) == "10000.0 YB" - def test_struct(self, project_id): query = """SELECT 1 int_field, STRUCT("a" as letter, 1 as num) struct_field""" diff --git a/tests/unit/test_features.py b/tests/unit/test_features.py index c810104f..bfe2ea9b 100644 --- a/tests/unit/test_features.py +++ b/tests/unit/test_features.py @@ -16,8 +16,8 @@ def fresh_bigquery_version(monkeypatch): @pytest.mark.parametrize( ["bigquery_version", "expected"], [ - ("1.11.1", False), - ("1.26.0", False), + ("1.27.2", False), + ("1.99.100", False), ("2.5.4", False), ("2.6.0", True), ("2.6.1", True), @@ -34,8 +34,8 @@ def test_bigquery_has_accurate_timestamp(monkeypatch, bigquery_version, expected @pytest.mark.parametrize( ["bigquery_version", "expected"], [ - ("1.11.1", False), - ("1.26.0", False), + ("1.27.2", False), + ("1.99.100", False), ("2.9.999", False), ("2.10.0", True), ("2.12.0", True), @@ -52,8 +52,8 @@ def test_bigquery_has_bignumeric(monkeypatch, bigquery_version, expected): @pytest.mark.parametrize( ["bigquery_version", "expected"], [ - ("1.11.1", False), - ("1.26.0", False), + ("1.27.2", False), + ("1.99.100", False), ("2.5.4", False), ("2.6.0", True), ("2.6.1", True), @@ -69,7 +69,13 @@ def test_bigquery_has_from_dataframe_with_csv(monkeypatch, bigquery_version, exp @pytest.mark.parametrize( ["bigquery_version", "expected"], - [("1.26.0", True), ("2.12.0", True), ("3.0.0", False), ("3.1.0", False)], + [ + ("1.27.2", True), + ("1.99.100", True), + ("2.12.0", True), + ("3.0.0", False), + ("3.1.0", False), + ], ) def test_bigquery_needs_date_as_object(monkeypatch, bigquery_version, expected): import google.cloud.bigquery diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 9748595f..74bec5ed 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -109,25 +109,8 @@ def test__is_query(query_or_table, expected): assert result == expected -def test_GbqConnector_get_client_w_old_bq(monkeypatch, mock_bigquery_client): - gbq._test_google_api_imports() - connector = _make_connector() - monkeypatch.setattr( - type(FEATURES), - "bigquery_has_client_info", - mock.PropertyMock(return_value=False), - ) - - connector.get_client() - - # No client_info argument. - mock_bigquery_client.assert_called_with(credentials=mock.ANY, project=mock.ANY) - - def test_GbqConnector_get_client_w_new_bq(mock_bigquery_client): gbq._test_google_api_imports() - if not FEATURES.bigquery_has_client_info: - pytest.skip("google-cloud-bigquery missing client_info feature") pytest.importorskip("google.api_core.client_info") connector = _make_connector() @@ -606,9 +589,6 @@ def test_read_gbq_passes_dtypes(mock_bigquery_client, mock_service_account_crede def test_read_gbq_use_bqstorage_api( mock_bigquery_client, mock_service_account_credentials ): - if not FEATURES.bigquery_has_bqstorage: # pragma: NO COVER - pytest.skip("requires BigQuery Storage API") - mock_service_account_credentials.project_id = "service_account_project_id" df = gbq.read_gbq( "SELECT 1 AS int_col", @@ -716,3 +696,25 @@ def test_read_gbq_with_list_rows_error_translates_exception( "my-project.my_dataset.read_gbq_table", credentials=mock_service_account_credentials, ) + + +@pytest.mark.parametrize( + ["size_in_bytes", "formatted_text"], + [ + (999, "999.0 B"), + (1024, "1.0 KB"), + (1099, "1.1 KB"), + (1044480, "1020.0 KB"), + (1048576, "1.0 MB"), + (1048576000, "1000.0 MB"), + (1073741824, "1.0 GB"), + (1.099512e12, "1.0 TB"), + (1.125900e15, "1.0 PB"), + (1.152922e18, "1.0 EB"), + (1.180592e21, "1.0 ZB"), + (1.208926e24, "1.0 YB"), + (1.208926e28, "10000.0 YB"), + ], +) +def test_query_response_bytes(size_in_bytes, formatted_text): + assert gbq.GbqConnector.sizeof_fmt(size_in_bytes) == formatted_text diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py index e488bdb5..22c542f1 100644 --- a/tests/unit/test_to_gbq.py +++ b/tests/unit/test_to_gbq.py @@ -49,6 +49,25 @@ def test_to_gbq_create_dataset_translates_exception(mock_bigquery_client): gbq.to_gbq(DataFrame([[1]]), "my_dataset.my_table", project_id="1234") +def test_to_gbq_load_method_translates_exception( + mock_bigquery_client, expected_load_method +): + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" + ) + expected_load_method.side_effect = google.api_core.exceptions.InternalServerError( + "error loading data" + ) + + with pytest.raises(gbq.GenericGBQException): + gbq.to_gbq( + DataFrame({"int_cole": [1, 2, 3]}), + "my_dataset.my_table", + project_id="myproj", + ) + expected_load_method.assert_called_once() + + def test_to_gbq_with_if_exists_append(mock_bigquery_client, expected_load_method): from google.cloud.bigquery import SchemaField