Skip to content

Commit bfff3d9

Browse files
scalar_in_linter() for x %in% 1 (#2084)
* New scalar_in_linter() * NEWS * specific TODO * Update test-scalar_in_linter.R * progress customizing for NA * correct tests * revert most changes related to NA (just skip linting) * tests for NA * tweak comment --------- Co-authored-by: AshesITR <alexander.rosenstock@web.de>
1 parent 35f3344 commit bfff3d9

12 files changed

+114
-4
lines changed

DESCRIPTION

+1
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ Collate:
150150
'redundant_ifelse_linter.R'
151151
'regex_subset_linter.R'
152152
'routine_registration_linter.R'
153+
'scalar_in_linter.R'
153154
'semicolon_linter.R'
154155
'seq_linter.R'
155156
'settings.R'

NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ export(redundant_ifelse_linter)
115115
export(regex_subset_linter)
116116
export(routine_registration_linter)
117117
export(sarif_output)
118+
export(scalar_in_linter)
118119
export(semicolon_linter)
119120
export(semicolon_terminator_linter)
120121
export(seq_linter)

NEWS.md

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
* `library_call_linter()` can detect if all library/require calls are not at the top of your script (#2027 and #2043, @nicholas-masel and @MichaelChirico).
4040
* `keyword_quote_linter()` for finding unnecessary or discouraged quoting of symbols in assignment, function arguments, or extraction (part of #884, @MichaelChirico). Quoting is unnecessary when the target is a valid R name, e.g. `c("a" = 1)` can be `c(a = 1)`. The same goes to assignment (`"a" <- 1`) and extraction (`x$"a"`). Where quoting is necessary, the linter encourages doing so with backticks (e.g. `` x$`a b` `` instead of `x$"a b"`).
4141
* `length_levels_linter()` for using the specific function `nlevels()` instead of checking `length(levels(x))` (part of #884, @MichaelChirico).
42+
* `scalar_in_linter()` for discouraging `%in%` when the right-hand side is a scalar, e.g. `x %in% 1` (part of #884, @MichaelChirico).
4243
* `if_not_else_linter()` for encouraging `if` statements to be structured as `if (A) x else y` instead of `if (!A) y else x` (part of #884, @MichaelChirico).
4344
* `commas_linter()` gains an option `allow_trailing` (default `FALSE`) to allow trailing commas while indexing. (#2104, @MEO265)
4445

R/scalar_in_linter.R

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#' Block usage like x %in% "a"
2+
#'
3+
#' `vector %in% set` is appropriate for matching a vector to a set, but if
4+
#' that set has size 1, `==` is more appropriate. `%chin%` from `data.table`
5+
#' is matched as well.
6+
#'
7+
#' `scalar %in% vector` is OK, because the alternative (`any(vector == scalar)`)
8+
#' is more circuitous & potentially less clear.
9+
#'
10+
#' @evalRd rd_tags("scalar_in_linter")
11+
#' @seealso [linters] for a complete list of linters available in lintr.
12+
#' @export
13+
scalar_in_linter <- function() {
14+
# TODO(#2085): Extend to include other cases where the RHS is clearly a scalar
15+
# NB: all of logical, integer, double, hex, complex are parsed as NUM_CONST
16+
xpath <- "
17+
//SPECIAL[text() = '%in%' or text() = '%chin%']
18+
/following-sibling::expr[NUM_CONST[not(starts-with(text(), 'NA'))] or STR_CONST]
19+
/parent::expr
20+
"
21+
22+
return(Linter(function(source_expression) {
23+
if (!is_lint_level(source_expression, "expression")) {
24+
return(list())
25+
}
26+
27+
xml <- source_expression$xml_parsed_content
28+
29+
bad_expr <- xml_find_all(xml, xpath)
30+
in_op <- xml_find_chr(bad_expr, "string(SPECIAL)")
31+
lint_msg <-
32+
paste0("Use == to match length-1 scalars, not ", in_op, ". Note that == preserves NA where ", in_op, " does not.")
33+
34+
xml_nodes_to_lints(
35+
bad_expr,
36+
source_expression = source_expression,
37+
lint_message = lint_msg,
38+
type = "warning"
39+
)
40+
}))
41+
}

inst/lintr/linters.csv

+1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ redundant_equals_linter,best_practices readability efficiency common_mistakes
7272
redundant_ifelse_linter,best_practices efficiency consistency configurable
7373
regex_subset_linter,best_practices efficiency
7474
routine_registration_linter,best_practices efficiency robustness
75+
scalar_in_linter,readability consistency best_practices efficiency
7576
semicolon_linter,style readability default configurable
7677
semicolon_terminator_linter,style readability deprecated configurable
7778
seq_linter,robustness efficiency consistency best_practices default

man/best_practices_linters.Rd

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/consistency_linters.Rd

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/efficiency_linters.Rd

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/linters.Rd

+5-4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/readability_linters.Rd

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/scalar_in_linter.Rd

+23
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
test_that("scalar_in_linter skips allowed usages", {
2+
linter <- scalar_in_linter()
3+
4+
expect_lint("x %in% y", NULL, linter)
5+
expect_lint("y %in% c('a', 'b')", NULL, linter)
6+
expect_lint("c('a', 'b') %chin% x", NULL, linter)
7+
expect_lint("z %in% 1:3", NULL, linter)
8+
# scalars on LHS are fine (often used as `"col" %in% names(DF)`)
9+
expect_lint("3L %in% x", NULL, linter)
10+
11+
# this should be is.na(x), but it more directly uses the "always TRUE/FALSE, _not_ NA"
12+
# aspect of %in%, so we delegate linting here to equals_na_linter()
13+
expect_lint("x %in% NA", NULL, linter)
14+
expect_lint("x %in% NA_character_", NULL, linter)
15+
})
16+
17+
test_that("scalar_in_linter blocks simple disallowed usages", {
18+
linter <- scalar_in_linter()
19+
lint_in_msg <- rex::rex("Use == to match length-1 scalars, not %in%.")
20+
lint_chin_msg <- rex::rex("Use == to match length-1 scalars, not %chin%.")
21+
22+
expect_lint("x %in% 1", lint_in_msg, linter)
23+
expect_lint("x %chin% 'a'", lint_chin_msg, linter)
24+
})
25+
26+
test_that("multiple lints are generated correctly", {
27+
linter <- scalar_in_linter()
28+
29+
expect_lint(
30+
trim_some('{
31+
x %in% 1
32+
y %chin% "a"
33+
}'),
34+
list("%in%", "%chin%"),
35+
linter
36+
)
37+
})

0 commit comments

Comments
 (0)