Skip to content

ENH: add render_links for Styler.to_html formatting #45058

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Dec 28, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ Additionally there are specific enhancements to the HTML specific rendering:
- :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index``, ``sparse_columns``, ``bold_headers``, ``caption``, ``max_rows`` and ``max_columns`` (:issue:`41946`, :issue:`43149`, :issue:`42972`).
- :meth:`.Styler.to_html` omits CSSStyle rules for hidden table elements as a performance enhancement (:issue:`43619`)
- Custom CSS classes can now be directly specified without string replacement (:issue:`43686`)
- Ability to render hyperlinks automatically via a new ``render_links`` formatting keyword argument (:issue:`45058`)

There are also some LaTeX specific enhancements:

Expand Down
35 changes: 33 additions & 2 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,7 @@ def format(
decimal: str = ".",
thousands: str | None = None,
escape: str | None = None,
render_links: bool = False,
) -> StylerRenderer:
r"""
Format the text display value of cells.
Expand Down Expand Up @@ -842,6 +843,12 @@ def format(

.. versionadded:: 1.3.0

render_links : bool
Convert string patterns containing https://, http://, ftp:// or www. to
HTML <a> tags as clickable URL hyperlinks.

.. versionadded:: 1.4.0

Returns
-------
self : Styler
Expand Down Expand Up @@ -958,6 +965,7 @@ def format(
thousands is None,
na_rep is None,
escape is None,
render_links is False,
)
):
self._display_funcs.clear()
Expand All @@ -980,6 +988,7 @@ def format(
decimal=decimal,
thousands=thousands,
escape=escape,
render_links=render_links,
)
for ri in ris:
self._display_funcs[(ri, ci)] = format_func
Expand All @@ -996,6 +1005,7 @@ def format_index(
decimal: str = ".",
thousands: str | None = None,
escape: str | None = None,
render_links: bool = False,
) -> StylerRenderer:
r"""
Format the text display value of index labels or column headers.
Expand Down Expand Up @@ -1027,6 +1037,9 @@ def format_index(
``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
LaTeX-safe sequences.
Escaping is done before ``formatter``.
render_links : bool
Convert string patterns containing https://, http://, ftp:// or www. to
HTML <a> tags as clickable URL hyperlinks.

Returns
-------
Expand Down Expand Up @@ -1128,6 +1141,7 @@ def format_index(
thousands is None,
na_rep is None,
escape is None,
render_links is False,
)
):
display_funcs_.clear()
Expand All @@ -1149,6 +1163,7 @@ def format_index(
decimal=decimal,
thousands=thousands,
escape=escape,
render_links=render_links,
)

for idx in [(i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj))]:
Expand Down Expand Up @@ -1391,13 +1406,23 @@ def _str_escape(x, escape):
return x


def _render_href(x):
"""uses regex to detect a common URL pattern and converts to HTML <a> tag"""
if isinstance(x, str):
href = '<a href="{0}" target="_blank">{0}</a>'
pat = r"(https?:\/\/|ftp:\/\/|www.)[\w/\-?=%.]+\.[\w/\-&?=%.]+"
return re.sub(pat, lambda m: href.format(m.group(0)), x)
return x


def _maybe_wrap_formatter(
formatter: BaseFormatter | None = None,
na_rep: str | None = None,
precision: int | None = None,
decimal: str = ".",
thousands: str | None = None,
escape: str | None = None,
render_links: bool = False,
) -> Callable:
"""
Allows formatters to be expressed as str, callable or None, where None returns
Expand Down Expand Up @@ -1431,11 +1456,17 @@ def _maybe_wrap_formatter(
else:
func_2 = func_1

# Render links
if render_links:
func_3 = lambda x: func_2(_render_href(x))
else:
func_3 = func_2

# Replace missing values if na_rep
if na_rep is None:
return func_2
return func_3
else:
return lambda x: na_rep if isna(x) else func_2(x)
return lambda x: na_rep if isna(x) else func_3(x)


def non_reducing_slice(slice_: Subset):
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/io/formats/style/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -764,3 +764,33 @@ def test_hiding_index_columns_multiindex_trimming():
)

assert result == expected


@pytest.mark.parametrize("type", ["data", "index"])
@pytest.mark.parametrize(
"text, exp, found",
[
("no link, just text", False, ""),
("subdomain not www: sub.web.com", False, ""),
("www subdomain: www.web.com other", True, "www.web.com"),
("scheme full structure: http://www.web.com", True, "http://www.web.com"),
("scheme no top-level: http://www.web", True, "http://www.web"),
("no scheme, no top-level: www.web", False, "www.web"),
("https scheme: https://www.web.com", True, "https://www.web.com"),
("ftp scheme: ftp://www.web", True, "ftp://www.web"),
("subdirectories: www.web.com/directory", True, "www.web.com/directory"),
("Multiple domains: www.1.2.3.4", True, "www.1.2.3.4"),
],
)
def test_rendered_links(type, text, exp, found):
if type == "data":
df = DataFrame([text])
styler = df.style.format(render_links=True)
else:
df = DataFrame([0], index=[text])
styler = df.style.format_index(render_links=True)

rendered = '<a href="{0}" target="_blank">{0}</a>'.format(found)
result = styler.to_html()
assert (rendered in result) is exp
assert (text in result) is not exp # test conversion done when expected and not