Skip to content

feat: adds the capability to include custom user agent string #819

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 79 additions & 2 deletions pandas_gbq/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,8 @@ def __init__(
auth_redirect_uri=None,
client_id=None,
client_secret=None,
user_agent=None,
rfc9110_delimiter=False,
):
global context
from google.api_core.exceptions import ClientError, GoogleAPIError
Expand All @@ -284,6 +286,8 @@ def __init__(
self.auth_redirect_uri = auth_redirect_uri
self.client_id = client_id
self.client_secret = client_secret
self.user_agent = user_agent
self.rfc9110_delimiter = rfc9110_delimiter

default_project = None

Expand Down Expand Up @@ -337,11 +341,17 @@ def log_elapsed_seconds(self, prefix="Elapsed", postfix="s.", overlong=6):

def get_client(self):
import google.api_core.client_info
import pandas

# import pandas # noqa: F401 # TODO is this line needed here?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just adding this comment about addressing the TODO
is the concern that the import is redundant? was it in here for region tag reasons? (I don't see a region tag 🤷🏻‍♀️ )


bigquery = FEATURES.bigquery_try_import()

user_agent = create_user_agent(
user_agent=self.user_agent, rfc9110_delimiter=self.rfc9110_delimiter
)

client_info = google.api_core.client_info.ClientInfo(
user_agent="pandas-{}".format(pandas.__version__)
user_agent=user_agent,
)
return bigquery.Client(
project=self.project_id,
Expand Down Expand Up @@ -961,6 +971,8 @@ def to_gbq(
auth_redirect_uri=None,
client_id=None,
client_secret=None,
user_agent=None,
rfc9110_delimiter=False,
):
"""Write a DataFrame to a Google BigQuery table.

Expand Down Expand Up @@ -1072,6 +1084,13 @@ def to_gbq(
client_secret : str
The Client Secret associated with the Client ID for the Google Cloud Project
the user is attempting to connect to.
user_agent : str
Custom user agent string used as a prefix to the pandas version.
rfc9110_delimiter : bool
Sets user agent delimiter to a hyphen or a slash.
Default is False, meaning a hyphen will be used.

.. versionadded:: 0.23.3
"""

_test_google_api_imports()
Expand Down Expand Up @@ -1130,6 +1149,8 @@ def to_gbq(
auth_redirect_uri=auth_redirect_uri,
client_id=client_id,
client_secret=client_secret,
user_agent=user_agent,
rfc9110_delimiter=rfc9110_delimiter,
)
bqclient = connector.client

Expand Down Expand Up @@ -1409,3 +1430,59 @@ def create(self, dataset_id):
self.client.create_dataset(dataset)
except self.http_error as ex:
self.process_http_error(ex)


def create_user_agent(
user_agent: Optional[str] = None, rfc9110_delimiter: bool = False
) -> str:
"""Creates a user agent string.

The legacy format of our the user agent string was: `product-x.y.z` (where x,
y, and z are the major, minor, and micro version numbers).

Users are able to prepend this string with their own user agent identifier
to render something similar to `<my_user_agent> pandas-x.y.z`.

The legacy format used a hyphen to separate the product from the product
version which differs slightly from the format recommended by RFC9110, which is:
`product/x.y.z`. To produce a user agent more in line with the RFC, set
rfc9110_delimiter to True. This setting does not depend on whether a
user_agent is also supplied.

Reference:
https://www.rfc-editor.org/info/rfc9110

Args:
user_agent (Optional[str]): User agent string.

rfc9110_delimiter (Optional[bool]): Sets delimiter to a hyphen or a slash.
Default is False, meaning a hyphen will be used.

Returns (str):
Customized user agent string.

Deprecation Warning:
In a future major release, the default delimiter will be changed to
a `/` in accordance with RFC9110.
"""
import pandas as pd

if rfc9110_delimiter:
delimiter = "/"
else:
warnings.warn(
"In a future major release, the default delimiter will be "
"changed to a `/` in accordance with RFC9110.",
PendingDeprecationWarning,
stacklevel=2,
)
delimiter = "-"

identity = f"pandas{delimiter}{pd.__version__}"

if user_agent is None:
user_agent = identity
else:
user_agent = f"{user_agent} {identity}"

return user_agent
20 changes: 18 additions & 2 deletions tests/unit/test_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,15 @@ def test_read_gbq_wo_verbose_w_new_pandas_no_warnings(monkeypatch, recwarn):
mock.PropertyMock(return_value=False),
)
gbq.read_gbq("SELECT 1", project_id="my-project", dialect="standard")
assert len(recwarn) == 0
# This test was intended to check for warnings about the deprecation of
# the argument `verbose` (which was removed from gbq (~v0.4.0) and
# pandas (~v0.23.0). (See https://github.com/googleapis/python-bigquery-pandas/pull/158/files)
# This test should not fail upon seeing a warning in regards to a pending
# deprecation related to rfc9110 delimiters.
# TODO this and related tests have likely outlived their usefulness,
# consider removing.
for warning in recwarn.list:
assert "delimiter" in str(warning.message)


def test_read_gbq_with_old_bq_raises_importerror(monkeypatch):
Expand All @@ -660,7 +668,15 @@ def test_read_gbq_with_verbose_old_pandas_no_warnings(monkeypatch, recwarn):
dialect="standard",
verbose=True,
)
assert len(recwarn) == 0
# This test was intended to check for warnings about the deprecation of
# the argument `verbose` (which was removed from gbq (~v0.4.0) and
# pandas (~v0.23.0). (See https://github.com/googleapis/python-bigquery-pandas/pull/158/files)
# This test should not fail upon seeing a warning in regards to a pending
# deprecation related to rfc9110 delimiters.
# TODO this and related tests have likely outlived their usefulness,
# consider removing.
for warning in recwarn.list:
assert "delimiter" in str(warning.message)


def test_read_gbq_with_private_raises_notimplmentederror():
Expand Down
25 changes: 25 additions & 0 deletions tests/unit/test_to_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import google.api_core.exceptions
import google.cloud.bigquery
import pandas as pd
from pandas import DataFrame
import pytest

Expand Down Expand Up @@ -158,3 +159,27 @@ def test_to_gbq_with_if_exists_unknown():
project_id="myproj",
if_exists="unknown",
)


@pytest.mark.parametrize(
"user_agent,rfc9110_delimiter,expected",
[
(
"test_user_agent/2.0.42",
False,
f"test_user_agent/2.0.42 pandas-{pd.__version__}",
),
(None, False, f"pandas-{pd.__version__}"),
(
"test_user_agent/2.0.42",
True,
f"test_user_agent/2.0.42 pandas/{pd.__version__}",
),
(None, True, f"pandas/{pd.__version__}"),
],
)
def test_create_user_agent(user_agent, rfc9110_delimiter, expected):
from pandas_gbq.gbq import create_user_agent

result = create_user_agent(user_agent, rfc9110_delimiter)
assert result == expected