From 3990670ccb70afb71ab70d93bbd7fa4f945ae110 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 21 Aug 2025 21:21:14 +0200 Subject: [PATCH 01/13] allow showProgress=INTEGER to set progress bar update time --- R/data.table.R | 6 +++--- src/dogroups.c | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index c64c391b33..f89f6c33e5 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -147,7 +147,7 @@ replace_dot_alias = function(e) { } } -"[.data.table" = function(x, i, j, by, keyby, with=TRUE, nomatch=NA, mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0.0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL, env=NULL, showProgress=getOption("datatable.showProgress", interactive())) +"[.data.table" = function(x, i, j, by, keyby, with=TRUE, nomatch=NA, mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0.0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL, env=NULL, showProgress=as.integer(getOption("datatable.showProgress", interactive()))) { # ..selfcount <<- ..selfcount+1 # in dev, we check no self calls, each of which doubles overhead, or could # test explicitly if the caller is [.data.table (even stronger test. TO DO.) @@ -244,7 +244,7 @@ replace_dot_alias = function(e) { if ((isTRUE(which)||is.na(which)) && !missing(j)) stopf("which==%s (meaning return row numbers) but j is also supplied. Either you need row numbers or the result of j, but only one type of result can be returned.", which) if (is.null(nomatch) && is.na(which)) stopf("which=NA with nomatch=0|NULL would always return an empty vector. Please change or remove either which or nomatch.") if (!with && missing(j)) stopf("j must be provided when with=FALSE") - if (!missing(by) && !isTRUEorFALSE(showProgress)) stopf("%s must be TRUE or FALSE", "showProgress") + if (!missing(by) && !is.numeric(showProgress)) stopf("%s must be numeric.", "showProgress") irows = NULL # Meaning all rows. We avoid creating 1:nrow(x) for efficiency. notjoin = FALSE rightcols = leftcols = integer() @@ -1934,7 +1934,7 @@ replace_dot_alias = function(e) { } ans = c(g, ans) } else { - ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose, showProgress) + ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose, as.integer(showProgress)) } # unlock any locked data.table components of the answer, #4159 # MAX_DEPTH prevents possible infinite recursion from truly recursive object, #4173 diff --git a/src/dogroups.c b/src/dogroups.c index d4f774568b..43d1e97fae 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -87,9 +87,10 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX SEXP SDall = PROTECT(findVar(install(".SDall"), env)); nprotect++; // PROTECT for rchk SEXP SD = PROTECT(findVar(install(".SD"), env)); nprotect++; - const bool showProgress = LOGICAL(showProgressArg)[0]==1 && ngrp > 1; // showProgress only if more than 1 group + int updateTime = INTEGER(showProgressArg)[0]; + const bool showProgress = updateTime > 0 && ngrp > 1; // showProgress only if more than 1 group double startTime = (showProgress) ? wallclock() : 0; // For progress printing, startTime is set at the beginning - double nextTime = (showProgress) ? startTime+3 : 0; // wait 3 seconds before printing progress + double nextTime = (showProgress) ? startTime + MAX(updateTime, 3) : 0; // wait at least 3 seconds before printing progress defineVar(sym_BY, BY = PROTECT(allocVector(VECSXP, ngrpcols)), env); nprotect++; // PROTECT for rchk SEXP bynames = PROTECT(allocVector(STRSXP, ngrpcols)); nprotect++; // TO DO: do we really need bynames, can we assign names afterwards in one step? @@ -456,7 +457,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX Rprintf(_("Processed %d groups out of %d. %.0f%% done. Time elapsed: %ds. ETA: %ds."), i+1, ngrp, 100.0*(i+1)/ngrp, (int)(now-startTime), ETA); // # nocov end } - nextTime = now+1; + nextTime = now+updateTime; hasPrinted = true; } ansloc += maxn; From 0d0250896dd0a703fc6959b9ce92917103dc83cd Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 4 Jan 2026 22:47:27 +0100 Subject: [PATCH 02/13] add NEWS --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 23e8d5c873..d3b0461d92 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,6 +16,8 @@ 1. `nafill()`, `setnafill()` extended to work on logical vectors (part of [#3992](https://github.com/Rdatatable/data.table/issues/3992)). Thanks @jangorecki for the request and @MichaelChirico for the PR. +2. `showProgress` argument and `options("datatable.showProgress")` now accept an integer to control the progress bar update interval in seconds, allowing finer control over progress reporting frequency, [#6514](https://github.com/Rdatatable/data.table/issues/6514). Thanks @ethanbsmith for the report and @ben-schwen for the PR. + ### Notes 1. {data.table} now depends on R 3.5.0 (2018). From 6efaf3723570ad1a436451de125db827b636c0ad Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 4 Jan 2026 22:48:21 +0100 Subject: [PATCH 03/13] cleanup merge --- src/dogroups.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/dogroups.c b/src/dogroups.c index 4ed7649268..37c16683f3 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -85,14 +85,9 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX // fix for longstanding FR/bug, #495. E.g., DT[, c(sum(v1), lapply(.SD, mean)), by=grp, .SDcols=v2:v3] resulted in error.. the idea is, 1) we create .SDall, which is normally == .SD. But if extra vars are detected in jexp other than .SD, then .SD becomes a shallow copy of .SDall with only .SDcols in .SD. Since internally, we don't make a copy, changing .SDall will reflect in .SD. Hopefully this'll workout :-). SEXP SDall = PROTECT(findVar(install(".SDall"), env)); nprotect++; // PROTECT for rchk SEXP SD = PROTECT(findVar(install(".SD"), env)); nprotect++; -<<<<<<< HEAD - + int updateTime = INTEGER(showProgressArg)[0]; const bool showProgress = updateTime > 0 && ngrp > 1; // showProgress only if more than 1 group -======= - - const bool showProgress = LOGICAL(showProgressArg)[0]==1 && ngrp > 1; // showProgress only if more than 1 group ->>>>>>> master double startTime = (showProgress) ? wallclock() : 0; // For progress printing, startTime is set at the beginning double nextTime = (showProgress) ? startTime + MAX(updateTime, 3) : 0; // wait at least 3 seconds before printing progress From 3147568a913c9355ba462f2efdc73b3fed1f0ad7 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 4 Jan 2026 22:51:59 +0100 Subject: [PATCH 04/13] refine NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index d3b0461d92..33e3c1f68a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,7 +16,7 @@ 1. `nafill()`, `setnafill()` extended to work on logical vectors (part of [#3992](https://github.com/Rdatatable/data.table/issues/3992)). Thanks @jangorecki for the request and @MichaelChirico for the PR. -2. `showProgress` argument and `options("datatable.showProgress")` now accept an integer to control the progress bar update interval in seconds, allowing finer control over progress reporting frequency, [#6514](https://github.com/Rdatatable/data.table/issues/6514). Thanks @ethanbsmith for the report and @ben-schwen for the PR. +2. `[,showProgress` and `options(datatable.showProgress)` now accept an integer to control the progress bar update interval in seconds, allowing finer control over progress reporting frequency, [#6514](https://github.com/Rdatatable/data.table/issues/6514). Thanks @ethanbsmith for the report and @ben-schwen for the PR. ### Notes From dc872f21dee9c9e070d01cd2c3791d1641daad7d Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 4 Jan 2026 22:59:19 +0100 Subject: [PATCH 05/13] make check more explicit --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index cdfc66ff14..9920181311 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -244,7 +244,7 @@ replace_dot_alias = function(e) { if ((isTRUE(which)||is.na(which)) && !missing(j)) stopf("which==%s (meaning return row numbers) but j is also supplied. Either you need row numbers or the result of j, but only one type of result can be returned.", which) if (is.null(nomatch) && is.na(which)) stopf("which=NA with nomatch=0|NULL would always return an empty vector. Please change or remove either which or nomatch.") if (!with && missing(j)) stopf("j must be provided when with=FALSE") - if (!missing(by) && !is.numeric(showProgress)) stopf("%s must be numeric.", "showProgress") + if (!missing(by) && !((isTRUEorFALSE(showProgress) || (is.numeric(showProgress) && length(showProgress)==1L && showProgress >= 0)))) stopf("showProgress must be TRUE, FALSE, or a single non-negative number") # nocov irows = NULL # Meaning all rows. We avoid creating 1:nrow(x) for efficiency. notjoin = FALSE rightcols = leftcols = integer() From b3e01665c03fdb6e3f43ea79239fd5595ee96c6c Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 4 Jan 2026 23:00:54 +0100 Subject: [PATCH 06/13] adjust docs --- man/data.table.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/data.table.Rd b/man/data.table.Rd index a674ecccb0..3b5f299ceb 100644 --- a/man/data.table.Rd +++ b/man/data.table.Rd @@ -36,7 +36,7 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac verbose = getOption("datatable.verbose"), # default: FALSE allow.cartesian = getOption("datatable.allow.cartesian"), # default: FALSE drop = NULL, on = NULL, env = NULL, - showProgress = getOption("datatable.showProgress", interactive())) + showProgress = as.integer(getOption("datatable.showProgress", interactive()))) } \arguments{ \item{\dots}{ Just as \code{\dots} in \code{\link{data.frame}}. Usual recycling rules are applied to vectors of different lengths to create a list of equal length vectors.} From 08fc8252e1d261b7b4714d91a8e89a416d20660e Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 4 Jan 2026 23:35:11 +0100 Subject: [PATCH 07/13] adjust nocov to what they really should cover --- src/dogroups.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dogroups.c b/src/dogroups.c index 37c16683f3..cfb2957326 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -452,17 +452,17 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX // could potentially refactor to use fread's progress() function, however we would lose some information in favor of simplicity. double now; if (showProgress && (now=wallclock())>=nextTime) { + // # nocov start. Requires long-running test case double avgTimePerGroup = (now-startTime)/(i+1); int ETA = (int)(avgTimePerGroup*(ngrp-i-1)); if (hasPrinted || ETA >= 0) { - // # nocov start. Requires long-running test case if (verbose && !hasPrinted) Rprintf(_("\n")); Rprintf("\r"); // # notranslate. \r is not internationalizable Rprintf(_("Processed %d groups out of %d. %.0f%% done. Time elapsed: %ds. ETA: %ds."), i+1, ngrp, 100.0*(i+1)/ngrp, (int)(now-startTime), ETA); - // # nocov end } nextTime = now+updateTime; hasPrinted = true; + // # nocov end } ansloc += maxn; if (firstalloc) { From 6ec0a837bf981691ef2a6e46b9aaed511120ad3d Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Thu, 8 Jan 2026 14:17:19 +0100 Subject: [PATCH 08/13] phrasing comment Co-authored-by: Michael Chirico --- src/dogroups.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dogroups.c b/src/dogroups.c index cfb2957326..d5b9f01bee 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -89,7 +89,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX int updateTime = INTEGER(showProgressArg)[0]; const bool showProgress = updateTime > 0 && ngrp > 1; // showProgress only if more than 1 group double startTime = (showProgress) ? wallclock() : 0; // For progress printing, startTime is set at the beginning - double nextTime = (showProgress) ? startTime + MAX(updateTime, 3) : 0; // wait at least 3 seconds before printing progress + double nextTime = (showProgress) ? startTime + MAX(updateTime, 3) : 0; // wait at least 3 seconds before starting to print progress hashtab * specials = hash_create(3 + ngrpcols + xlength(SDall)); // .I, .N, .GRP plus columns of .BY plus SDall PROTECT(specials->prot); nprotect++; From f9b76f33788532c9b240a863ba7a9c1bbd8d1032 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Thu, 8 Jan 2026 14:19:23 +0100 Subject: [PATCH 09/13] Update NEWS.md Co-authored-by: Michael Chirico --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 33e3c1f68a..c6a410bba1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,7 +16,7 @@ 1. `nafill()`, `setnafill()` extended to work on logical vectors (part of [#3992](https://github.com/Rdatatable/data.table/issues/3992)). Thanks @jangorecki for the request and @MichaelChirico for the PR. -2. `[,showProgress` and `options(datatable.showProgress)` now accept an integer to control the progress bar update interval in seconds, allowing finer control over progress reporting frequency, [#6514](https://github.com/Rdatatable/data.table/issues/6514). Thanks @ethanbsmith for the report and @ben-schwen for the PR. +2. `[,showProgress=]` and `options(datatable.showProgress)` now accept an integer to control the progress bar update interval in seconds, allowing finer control over progress reporting frequency; logical TRUE behaves as before, [#6514](https://github.com/Rdatatable/data.table/issues/6514). Thanks @ethanbsmith for the report and @ben-schwen for the PR. ### Notes From fda47aac898ca38651a4ea9cbde404e076440a8f Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 8 Jan 2026 14:23:52 +0100 Subject: [PATCH 10/13] refine NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index c6a410bba1..2930f2311a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,7 +16,7 @@ 1. `nafill()`, `setnafill()` extended to work on logical vectors (part of [#3992](https://github.com/Rdatatable/data.table/issues/3992)). Thanks @jangorecki for the request and @MichaelChirico for the PR. -2. `[,showProgress=]` and `options(datatable.showProgress)` now accept an integer to control the progress bar update interval in seconds, allowing finer control over progress reporting frequency; logical TRUE behaves as before, [#6514](https://github.com/Rdatatable/data.table/issues/6514). Thanks @ethanbsmith for the report and @ben-schwen for the PR. +2. `[,showProgress=]` and `options(datatable.showProgress)` now accept an integer to control the progress bar update interval in seconds, allowing finer control over progress reporting frequency; `TRUE` uses the default 3-second interval, [#6514](https://github.com/Rdatatable/data.table/issues/6514). Thanks @ethanbsmith for the report and @ben-schwen for the PR. ### Notes From 3579fb221d8ca2d49f19317ff526b9e84246aac1 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 8 Jan 2026 14:26:46 +0100 Subject: [PATCH 11/13] update man --- man/data.table.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/data.table.Rd b/man/data.table.Rd index 3b5f299ceb..acb8f24a7b 100644 --- a/man/data.table.Rd +++ b/man/data.table.Rd @@ -181,7 +181,7 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac \item{env}{ List or an environment, passed to \code{\link{substitute2}} for substitution of parameters in \code{i}, \code{j} and \code{by} (or \code{keyby}). Use \code{verbose} to preview constructed expressions. For more details see \href{../doc/datatable-programming.html}{\code{vignette("datatable-programming")}}. } - \item{showProgress}{ \code{TRUE} shows progress indicator with estimated time to completion for lengthy "by" operations. } + \item{showProgress}{ \code{TRUE} (default when \code{interactive()}) shows a progress indicator with estimated time to completion for lengthy "by" operations, updating every 3 seconds. An integer value controls the update interval in seconds (minimum 3). \code{FALSE} disables the progress indicator. } } \details{ \code{data.table} builds on base \R functionality to reduce 2 types of time:\cr From df8beabacf5d35a38e5860bb363d149180b76c64 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 8 Jan 2026 14:28:47 +0100 Subject: [PATCH 12/13] remove as.integer from signature --- R/data.table.R | 2 +- man/data.table.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 9920181311..f9baf62c3d 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -147,7 +147,7 @@ replace_dot_alias = function(e) { } } -"[.data.table" = function(x, i, j, by, keyby, with=TRUE, nomatch=NA, mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0.0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL, env=NULL, showProgress=as.integer(getOption("datatable.showProgress", interactive()))) +"[.data.table" = function(x, i, j, by, keyby, with=TRUE, nomatch=NA, mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0.0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL, env=NULL, showProgress=getOption("datatable.showProgress", interactive())) { # ..selfcount <<- ..selfcount+1 # in dev, we check no self calls, each of which doubles overhead, or could # test explicitly if the caller is [.data.table (even stronger test. TO DO.) diff --git a/man/data.table.Rd b/man/data.table.Rd index acb8f24a7b..cfdcb27068 100644 --- a/man/data.table.Rd +++ b/man/data.table.Rd @@ -36,7 +36,7 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac verbose = getOption("datatable.verbose"), # default: FALSE allow.cartesian = getOption("datatable.allow.cartesian"), # default: FALSE drop = NULL, on = NULL, env = NULL, - showProgress = as.integer(getOption("datatable.showProgress", interactive()))) + showProgress = getOption("datatable.showProgress", interactive())) } \arguments{ \item{\dots}{ Just as \code{\dots} in \code{\link{data.frame}}. Usual recycling rules are applied to vectors of different lengths to create a list of equal length vectors.} From ee671af24b30decc8661cd2440f024b4f22b0bfa Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 8 Jan 2026 14:34:43 +0100 Subject: [PATCH 13/13] remove unnecessary brackets --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index f9baf62c3d..1ce6b62cf5 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -244,7 +244,7 @@ replace_dot_alias = function(e) { if ((isTRUE(which)||is.na(which)) && !missing(j)) stopf("which==%s (meaning return row numbers) but j is also supplied. Either you need row numbers or the result of j, but only one type of result can be returned.", which) if (is.null(nomatch) && is.na(which)) stopf("which=NA with nomatch=0|NULL would always return an empty vector. Please change or remove either which or nomatch.") if (!with && missing(j)) stopf("j must be provided when with=FALSE") - if (!missing(by) && !((isTRUEorFALSE(showProgress) || (is.numeric(showProgress) && length(showProgress)==1L && showProgress >= 0)))) stopf("showProgress must be TRUE, FALSE, or a single non-negative number") # nocov + if (!missing(by) && !(isTRUEorFALSE(showProgress) || (is.numeric(showProgress) && length(showProgress)==1L && showProgress >= 0))) stopf("showProgress must be TRUE, FALSE, or a single non-negative number") # nocov irows = NULL # Meaning all rows. We avoid creating 1:nrow(x) for efficiency. notjoin = FALSE rightcols = leftcols = integer()