-
Notifications
You must be signed in to change notification settings - Fork 186
/
Copy pathextract.R
166 lines (144 loc) · 6.14 KB
/
extract.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# content is the file content from readLines
extract_r_source <- function(filename, lines, error = identity) {
pattern <- get_knitr_pattern(filename, lines)
if (is.null(pattern$chunk.begin) || is.null(pattern$chunk.end)) {
return(lines)
}
# mask non-source lines by NA, but keep total line count identical so the line number for EOF is correct, see #1400
output <- rep.int(NA_character_, length(lines))
chunks <- tryCatch(get_chunk_positions(pattern = pattern, lines = lines), error = error)
if (is_error(chunks) || is_lint(chunks)) {
assign("e", chunks, envir = parent.frame())
# error, so return empty code
return(output)
}
# no chunks found, so just return the lines
if (length(chunks[["starts"]]) == 0L || length(chunks[["ends"]]) == 0L) {
return(output)
}
output_env <- environment() # nolint: object_usage_linter. False positive-ish -- used below.
Map(
function(start, end, indent) {
line_seq <- seq(start + 1L, end - 1L)
chunk_code <- lines[line_seq]
output_env$output[line_seq] <- if (indent > 0L) substr(chunk_code, indent + 1L, nchar(chunk_code)) else chunk_code
},
chunks[["starts"]],
chunks[["ends"]],
chunks[["indents"]]
)
# drop <<chunk>> references, too
is.na(output) <- grep(pattern$ref.chunk, output)
replace_prefix(output, pattern$chunk.code)
}
get_knitr_pattern <- function(filename, lines) {
# Early return if the source code is parseable as plain R code.
# Otherwise, R code containing a line which matches any knitr pattern will be treated as a knitr file.
# See #1406 for details.
if (parsable(lines)) {
return(NULL)
}
# suppressWarnings for #1920. TODO(michaelchirico): this is a bit sloppy -- we ignore
# warnings here because encoding issues are caught later and that code path handles them
# correctly by converting to a lint. It would require some refactoring to get that
# right here as well, but it would avoid the duplication.
pattern <- withCallingHandlers(
("knitr" %:::% "detect_pattern")(lines, tolower(("knitr" %:::% "file_ext")(filename))),
warning = function(cond) {
if (!grepl("invalid UTF-8", conditionMessage(cond), fixed = TRUE)) {
cli_warn(cond) # nocov. No known way to reach here.
}
invokeRestart("muffleWarning")
}
)
if (!is.null(pattern)) {
knitr::all_patterns[[pattern]]
} else {
NULL
}
}
get_chunk_positions <- function(pattern, lines) {
starts <- filter_chunk_start_positions(
starts = grep(pattern$chunk.begin, lines, perl = TRUE),
lines = lines
)
ends <- filter_chunk_end_positions(
starts = starts,
ends = grep(pattern$chunk.end, lines, perl = TRUE)
)
# only keep those blocks that contain at least one line of code
keep <- which(ends - starts > 1L)
starts <- starts[keep]
ends <- ends[keep]
# Check indent on all lines in the chunk to allow for staggered indentation within a chunk;
# set the initial column to the leftmost one within each chunk (including the start+end gates). See tests.
# use 'ws_re' to make clear that we're matching knitr's definition of initial whitespace.
ws_re <- sub("```.*", "", pattern$chunk.begin)
extract_min_chunk_indent <- function(start, end) {
indents <- attr(regexpr(ws_re, lines[start:end], perl = TRUE), "match.length")
min(indents)
}
# NB: min() guarantees length(indents) == length(starts)
indents <- unlist(Map(extract_min_chunk_indent, starts, ends))
list(starts = starts, ends = ends, indents = indents)
}
filter_chunk_start_positions <- function(starts, lines) {
# keep blocks that don't set a knitr engine (and so contain evaluated R code)
drop_idx <- defines_knitr_engine(lines[starts])
starts[!drop_idx]
}
filter_chunk_end_positions <- function(starts, ends) {
# In a valid file, possibly with plain-code-blocks,
# - there should be at least as many ends as starts
# In Rmarkdown and Quarto, unevaluated blocks may open & close with the same ``` pattern
# that defines the end-pattern for an evaluated block
# This returns the first end-position that succeeds each start-position
# starts (1, 3, 5, 7, 11) --> (1, 3, 5, 7, 11)
# ends (2, 4, 6, 8, 9, 10, 12) --> (2, 4, 6, 8, 12) # return this
length_difference <- length(ends) - length(starts)
if (length_difference == 0L && all(ends > starts)) {
return(ends)
}
positions <- sort(c(starts = starts, ends = ends))
code_start_indexes <- grep("starts", names(positions), fixed = TRUE)
code_ends <- positions[pmin(1L + code_start_indexes, length(positions))]
bad_end_indexes <- grep("starts", names(code_ends), fixed = TRUE)
if (length(bad_end_indexes) > 0L) {
bad_start_positions <- positions[code_start_indexes[bad_end_indexes]]
# This error message is formatted like a parse error; don't use {cli}
stop(sprintf( # nolint: undesirable_function_call_linter.
"<rmd>:%1$d:1: Missing chunk end for chunk (maybe starting at line %1$d).\n",
bad_start_positions[1L]
), call. = FALSE)
}
code_ends
}
defines_knitr_engine <- function(start_lines) {
# Other packages defining custom engines should have them loaded and thus visible
# via knitr_engines$get() below. It seems the simplest way to accomplish this is
# for those packages to set some code in their .onLoad() hook, but that's not
# always done (nor quite recommended as a "best practice" by knitr).
# See the discussion on #1552.
# TODO(#1617): explore running loadNamespace() automatically.
engines <- names(knitr::knit_engines$get())
# {some_engine}, {some_engine label, ...} or {some_engine, ...}
bare_engine_pattern <- rex(
"{", or(engines), one_of("}", " ", ",")
)
# {... engine = "some_engine" ...}
explicit_engine_pattern <- rex(
boundary, "engine", any_spaces, "="
)
re_matches(start_lines, explicit_engine_pattern) |
re_matches(start_lines, bare_engine_pattern)
}
replace_prefix <- function(lines, prefix_pattern) {
if (is.null(prefix_pattern)) {
return(lines)
}
m <- gregexpr(prefix_pattern, lines)
non_na <- !is.na(m)
prefix_lengths <- lapply(regmatches(lines[non_na], m[non_na]), nchar)
regmatches(lines[non_na], m[non_na]) <- lapply(prefix_lengths, strrep, x = " ")
lines
}