Skip to content

Commit 92d2777

Browse files
authored
New data frame (#2994)
Add performant data.frame constructors and use them throughout the code
1 parent a330da3 commit 92d2777

File tree

114 files changed

+418
-359
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

114 files changed

+418
-359
lines changed

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ Imports:
3333
stats,
3434
tibble,
3535
viridisLite,
36-
withr (>= 2.0.0)
36+
withr (>= 2.0.0),
37+
grDevices
3738
Suggests:
3839
covr,
3940
dplyr,

R/aaa-.r

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,43 @@ NULL
1212
#' @keywords internal
1313
#' @name ggplot2-ggproto
1414
NULL
15+
16+
# Fast data.frame constructor and indexing
17+
# No checking, recycling etc. unless asked for
18+
new_data_frame <- function(x = list(), n = NULL) {
19+
if (length(x) != 0 && is.null(names(x))) stop("Elements must be named", call. = FALSE)
20+
lengths <- vapply(x, length, integer(1))
21+
if (is.null(n)) {
22+
n <- if (length(x) == 0) 0 else max(lengths)
23+
}
24+
for (i in seq_along(x)) {
25+
if (lengths[i] == n) next
26+
if (lengths[i] != 1) stop("Elements must equal the number of rows or 1", call. = FALSE)
27+
x[[i]] <- rep(x[[i]], n)
28+
}
29+
30+
class(x) <- "data.frame"
31+
32+
attr(x, "row.names") <- .set_row_names(n)
33+
x
34+
}
35+
36+
data_frame <- function(...) {
37+
new_data_frame(list(...))
38+
}
39+
40+
data.frame <- function(...) {
41+
stop('Please use `data_frame()` or `new_data_frame()` instead of `data.frame()` for better performance. See the vignette "ggplot2 internal programming guidelines" for details.', call. = FALSE)
42+
}
43+
44+
mat_2_df <- function(x, col_names = colnames(x), .check = FALSE) {
45+
x <- lapply(seq_len(ncol(x)), function(i) x[, i])
46+
if (!is.null(col_names)) names(x) <- col_names
47+
new_data_frame(x)
48+
}
49+
50+
df_col <- function(x, name) .subset2(x, name)
51+
52+
df_rows <- function(x, i) {
53+
new_data_frame(lapply(x, `[`, i = i))
54+
}

R/annotation-custom.r

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ GeomCustomAnn <- ggproto("GeomCustomAnn", Geom,
7474
stop("annotation_custom only works with Cartesian coordinates",
7575
call. = FALSE)
7676
}
77-
corners <- data.frame(x = c(xmin, xmax), y = c(ymin, ymax))
77+
corners <- new_data_frame(list(x = c(xmin, xmax), y = c(ymin, ymax)), n = 2)
7878
data <- coord$transform(corners, panel_params)
7979

8080
x_rng <- range(data$x, na.rm = TRUE)

R/annotation-logticks.r

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ calc_logticks <- function(base = 10, ticks_per_base = base - 1,
238238
longtick_after_base <- floor(ticks_per_base/2)
239239
tickend[ cycleIdx == longtick_after_base ] <- midend
240240

241-
tickdf <- data.frame(value = ticks, start = start, end = tickend)
241+
tickdf <- new_data_frame(list(value = ticks, start = start, end = tickend), n = length(ticks))
242242

243243
return(tickdf)
244244
}

R/annotation-raster.r

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ GeomRasterAnn <- ggproto("GeomRasterAnn", Geom,
7676
stop("annotation_raster only works with Cartesian coordinates",
7777
call. = FALSE)
7878
}
79-
corners <- data.frame(x = c(xmin, xmax), y = c(ymin, ymax))
79+
corners <- new_data_frame(list(x = c(xmin, xmax), y = c(ymin, ymax)), n = 2)
8080
data <- coord$transform(corners, panel_params)
8181

8282
x_rng <- range(data$x, na.rm = TRUE)

R/annotation.r

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ annotate <- function(geom, x = NULL, y = NULL, xmin = NULL, xmax = NULL,
5454
stop("Unequal parameter lengths: ", details, call. = FALSE)
5555
}
5656

57-
data <- data.frame(position)
57+
data <- new_data_frame(position, n = max(lengths))
5858
layer(
5959
geom = geom,
6060
params = list(

R/axis-secondary.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ AxisSecondary <- ggproto("AxisSecondary", NULL,
140140
},
141141

142142
transform_range = function(self, range) {
143-
range <- structure(data.frame(range), names = '.')
143+
range <- new_data_frame(list(. = range))
144144
rlang::eval_tidy(
145145
rlang::f_rhs(self$trans),
146146
data = range,

R/bench.r

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ benchplot <- function(x) {
2323

2424
times <- rbind(construct, build, render, draw)[, 1:3]
2525

26-
plyr::unrowname(data.frame(
26+
plyr::unrowname(base::data.frame(
2727
step = c("construct", "build", "render", "draw", "TOTAL"),
2828
rbind(times, colSums(times))))
2929
}

R/bin.R

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,15 +157,14 @@ bin_out <- function(count = integer(0), x = numeric(0), width = numeric(0),
157157
xmin = x - width / 2, xmax = x + width / 2) {
158158
density <- count / width / sum(abs(count))
159159

160-
data.frame(
160+
new_data_frame(list(
161161
count = count,
162162
x = x,
163163
xmin = xmin,
164164
xmax = xmax,
165165
width = width,
166166
density = density,
167167
ncount = count / max(abs(count)),
168-
ndensity = density / max(abs(density)),
169-
stringsAsFactors = FALSE
170-
)
168+
ndensity = density / max(abs(density))
169+
), n = length(count))
171170
}

R/coord-map.r

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -248,10 +248,10 @@ CoordMap <- ggproto("CoordMap", Coord,
248248
))
249249
}
250250

251-
x_intercept <- with(panel_params, data.frame(
251+
x_intercept <- with(panel_params, new_data_frame(list(
252252
x = x.major,
253253
y = y.range[1]
254-
))
254+
), n = length(x.major)))
255255
pos <- self$transform(x_intercept, panel_params)
256256

257257
axes <- list(
@@ -272,10 +272,10 @@ CoordMap <- ggproto("CoordMap", Coord,
272272
))
273273
}
274274

275-
x_intercept <- with(panel_params, data.frame(
275+
x_intercept <- with(panel_params, new_data_frame(list(
276276
x = x.range[1],
277277
y = y.major
278-
))
278+
), n = length(y.major)))
279279
pos <- self$transform(x_intercept, panel_params)
280280

281281
axes <- list(

R/coord-munch.r

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ munch_data <- function(data, dist = NULL, segment_length = 0.01) {
6060
id <- c(rep(seq_len(nrow(data) - 1), extra), nrow(data))
6161
aes_df <- data[id, setdiff(names(data), c("x", "y")), drop = FALSE]
6262

63-
plyr::unrowname(data.frame(x = x, y = y, aes_df))
63+
new_data_frame(c(list(x = x, y = y), unclass(aes_df)))
6464
}
6565

6666
# Interpolate.
@@ -171,9 +171,9 @@ find_line_formula <- function(x, y) {
171171
slope <- diff(y) / diff(x)
172172
yintercept <- y[-1] - (slope * x[-1])
173173
xintercept <- x[-1] - (y[-1] / slope)
174-
data.frame(x1 = x[-length(x)], y1 = y[-length(y)],
174+
new_data_frame(list(x1 = x[-length(x)], y1 = y[-length(y)],
175175
x2 = x[-1], y2 = y[-1],
176-
slope = slope, yintercept = yintercept, xintercept = xintercept)
176+
slope = slope, yintercept = yintercept, xintercept = xintercept))
177177
}
178178

179179
# Spiral arc length

R/facet-.r

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,7 @@ eval_facet <- function(facet, data, env = emptyenv()) {
443443

444444
layout_null <- function() {
445445
# PANEL needs to be a factor to be consistent with other facet types
446-
data.frame(PANEL = factor(1), ROW = 1, COL = 1, SCALE_X = 1, SCALE_Y = 1)
446+
new_data_frame(list(PANEL = factor(1), ROW = 1, COL = 1, SCALE_X = 1, SCALE_Y = 1))
447447
}
448448

449449
check_layout <- function(x) {
@@ -493,12 +493,12 @@ find_panel <- function(table) {
493493
layout <- table$layout
494494
panels <- layout[grepl("^panel", layout$name), , drop = FALSE]
495495

496-
data.frame(
497-
t = min(panels$t),
498-
r = max(panels$r),
499-
b = max(panels$b),
500-
l = min(panels$l)
501-
)
496+
new_data_frame(list(
497+
t = min(.subset2(panels, "t")),
498+
r = max(.subset2(panels, "r")),
499+
b = max(.subset2(panels, "b")),
500+
l = min(.subset2(panels, "l"))
501+
), n = 1)
502502
}
503503
#' @rdname find_panel
504504
#' @export
@@ -526,7 +526,7 @@ panel_rows <- function(table) {
526526
#' @keywords internal
527527
#' @export
528528
combine_vars <- function(data, env = emptyenv(), vars = NULL, drop = TRUE) {
529-
if (length(vars) == 0) return(data.frame())
529+
if (length(vars) == 0) return(new_data_frame())
530530

531531
# For each layer, compute the facet values
532532
values <- compact(plyr::llply(data, eval_facets, facets = vars, env = env))

R/facet-grid-.r

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -232,11 +232,10 @@ FacetGrid <- ggproto("FacetGrid", Facet,
232232
panel <- plyr::id(base, drop = TRUE)
233233
panel <- factor(panel, levels = seq_len(attr(panel, "n")))
234234

235-
rows <- if (!length(names(rows))) 1L else plyr::id(base[names(rows)], drop = TRUE)
236-
cols <- if (!length(names(cols))) 1L else plyr::id(base[names(cols)], drop = TRUE)
235+
rows <- if (!length(names(rows))) rep(1L, length(panel)) else plyr::id(base[names(rows)], drop = TRUE)
236+
cols <- if (!length(names(cols))) rep(1L, length(panel)) else plyr::id(base[names(cols)], drop = TRUE)
237237

238-
panels <- data.frame(PANEL = panel, ROW = rows, COL = cols, base,
239-
check.names = FALSE, stringsAsFactors = FALSE)
238+
panels <- new_data_frame(c(list(PANEL = panel, ROW = rows, COL = cols), base))
240239
panels <- panels[order(panels$PANEL), , drop = FALSE]
241240
rownames(panels) <- NULL
242241

R/facet-null.r

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ FacetNull <- ggproto("FacetNull", Facet,
3030
# Need the is.waive check for special case where no data, but aesthetics
3131
# are mapped to vectors
3232
if (is.waive(data))
33-
return(tibble(PANEL = factor()))
33+
return(new_data_frame(list(PANEL = factor())))
3434

3535
if (empty(data))
36-
return(cbind(data, PANEL = factor()))
36+
return(new_data_frame(c(data, list(PANEL = factor()))))
3737

3838
# Needs to be a factor to be consistent with other facet types
3939
data$PANEL <- factor(1)

R/facet-wrap.r

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ FacetWrap <- ggproto("FacetWrap", Facet,
149149
n <- attr(id, "n")
150150

151151
dims <- wrap_dims(n, params$nrow, params$ncol)
152-
layout <- data.frame(PANEL = factor(id, levels = seq_len(n)))
152+
layout <- new_data_frame(list(PANEL = factor(id, levels = seq_len(n))))
153153

154154
if (params$as.table) {
155155
layout$ROW <- as.integer((id - 1L) %/% dims[2] + 1L)

R/fortify-map.r

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@
2222
#' geom_polygon(aes(group = group), colour = "white")
2323
#' }
2424
fortify.map <- function(model, data, ...) {
25-
df <- as.data.frame(model[c("x", "y")])
26-
names(df) <- c("long", "lat")
27-
df$group <- cumsum(is.na(df$long) & is.na(df$lat)) + 1
28-
df$order <- 1:nrow(df)
25+
df <- new_data_frame(list(
26+
long = model$x,
27+
lat = model$y,
28+
group = cumsum(is.na(model$x) & is.na(model$y)) + 1,
29+
order = seq_along(model$x)
30+
), n = length(model$x))
2931

3032
names <- do.call("rbind", lapply(strsplit(model$names, "[:,]"), "[", 1:2))
3133
df$region <- names[df$group, 1]

R/fortify-multcomp.r

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ NULL
3333
#' @rdname fortify-multcomp
3434
#' @export
3535
fortify.glht <- function(model, data, ...) {
36-
plyr::unrowname(data.frame(
36+
plyr::unrowname(base::data.frame(
3737
lhs = rownames(model$linfct),
3838
rhs = model$rhs,
3939
estimate = stats::coef(model),
@@ -48,7 +48,7 @@ fortify.confint.glht <- function(model, data, ...) {
4848
coef <- model$confint
4949
colnames(coef) <- tolower(colnames(coef))
5050

51-
plyr::unrowname(data.frame(
51+
plyr::unrowname(base::data.frame(
5252
lhs = rownames(coef),
5353
rhs = model$rhs,
5454
coef,
@@ -64,7 +64,7 @@ fortify.summary.glht <- function(model, data, ...) {
6464
model$test[c("coefficients", "sigma", "tstat", "pvalues")])
6565
names(coef) <- c("estimate", "se", "t", "p")
6666

67-
plyr::unrowname(data.frame(
67+
plyr::unrowname(base::data.frame(
6868
lhs = rownames(coef),
6969
rhs = model$rhs,
7070
coef,
@@ -77,7 +77,7 @@ fortify.summary.glht <- function(model, data, ...) {
7777
#' @rdname fortify-multcomp
7878
#' @export
7979
fortify.cld <- function(model, data, ...) {
80-
plyr::unrowname(data.frame(
80+
plyr::unrowname(base::data.frame(
8181
lhs = names(model$mcletters$Letters),
8282
letters = model$mcletters$Letters,
8383
check.names = FALSE,

R/geom-abline.r

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,12 @@ geom_abline <- function(mapping = NULL, data = NULL,
8484
if (!missing(slope) || !missing(intercept)) {
8585
if (missing(slope)) slope <- 1
8686
if (missing(intercept)) intercept <- 0
87+
n_slopes <- max(length(slope), length(intercept))
8788

88-
data <- data.frame(intercept = intercept, slope = slope)
89+
data <- new_data_frame(list(
90+
intercept = intercept,
91+
slope = slope
92+
), n = n_slopes)
8993
mapping <- aes(intercept = intercept, slope = slope)
9094
show.legend <- FALSE
9195
}

R/geom-boxplot.r

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -200,41 +200,42 @@ GeomBoxplot <- ggproto("GeomBoxplot", Geom,
200200
outlier.alpha = NULL,
201201
notch = FALSE, notchwidth = 0.5, varwidth = FALSE) {
202202

203-
common <- data.frame(
203+
common <- list(
204204
colour = data$colour,
205205
size = data$size,
206206
linetype = data$linetype,
207207
fill = alpha(data$fill, data$alpha),
208-
group = data$group,
209-
stringsAsFactors = FALSE
208+
group = data$group
210209
)
211210

212-
whiskers <- data.frame(
213-
x = data$x,
214-
xend = data$x,
215-
y = c(data$upper, data$lower),
216-
yend = c(data$ymax, data$ymin),
217-
alpha = NA,
218-
common,
219-
stringsAsFactors = FALSE
220-
)
211+
whiskers <- new_data_frame(c(
212+
list(
213+
x = c(data$x, data$x),
214+
xend = c(data$x, data$x),
215+
y = c(data$upper, data$lower),
216+
yend = c(data$ymax, data$ymin),
217+
alpha = c(NA_real_, NA_real_)
218+
),
219+
common
220+
), n = 2)
221221

222-
box <- data.frame(
223-
xmin = data$xmin,
224-
xmax = data$xmax,
225-
ymin = data$lower,
226-
y = data$middle,
227-
ymax = data$upper,
228-
ynotchlower = ifelse(notch, data$notchlower, NA),
229-
ynotchupper = ifelse(notch, data$notchupper, NA),
230-
notchwidth = notchwidth,
231-
alpha = data$alpha,
232-
common,
233-
stringsAsFactors = FALSE
234-
)
222+
box <- new_data_frame(c(
223+
list(
224+
xmin = data$xmin,
225+
xmax = data$xmax,
226+
ymin = data$lower,
227+
y = data$middle,
228+
ymax = data$upper,
229+
ynotchlower = ifelse(notch, data$notchlower, NA),
230+
ynotchupper = ifelse(notch, data$notchupper, NA),
231+
notchwidth = notchwidth,
232+
alpha = data$alpha
233+
),
234+
common
235+
))
235236

236237
if (!is.null(data$outliers) && length(data$outliers[[1]] >= 1)) {
237-
outliers <- data.frame(
238+
outliers <- new_data_frame(list(
238239
y = data$outliers[[1]],
239240
x = data$x[1],
240241
colour = outlier.colour %||% data$colour[1],
@@ -243,9 +244,8 @@ GeomBoxplot <- ggproto("GeomBoxplot", Geom,
243244
size = outlier.size %||% data$size[1],
244245
stroke = outlier.stroke %||% data$stroke[1],
245246
fill = NA,
246-
alpha = outlier.alpha %||% data$alpha[1],
247-
stringsAsFactors = FALSE
248-
)
247+
alpha = outlier.alpha %||% data$alpha[1]
248+
), n = length(data$outliers[[1]]))
249249
outliers_grob <- GeomPoint$draw_panel(outliers, panel_params, coord)
250250
} else {
251251
outliers_grob <- NULL

0 commit comments

Comments
 (0)