From 01865974ca3a241e4f6d8a9d81670527d3fe50bb Mon Sep 17 00:00:00 2001 From: Leonidas Zhak <70497898+LeonidasZhak@users.noreply.github.com> Date: Sat, 6 Jun 2026 20:30:41 +0800 Subject: [PATCH 1/2] docs: add ordering note to shift.Rd, character example to nafill.Rd - shift.Rd: Add note explaining that shift operates on row position, not time order. Critical gotcha for Stata migrants who expect L.var behavior (automatic time-ordering after xtset). Add explicit example showing the WRONG vs RIGHT approach with unsorted data. - nafill.Rd: Add character vector example (locf + const fill). Character was listed as supported type but had no example. --- man/nafill.Rd | 5 +++++ man/shift.Rd | 17 +++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/man/nafill.Rd b/man/nafill.Rd index 90c4b1c5c6..08ed420936 100644 --- a/man/nafill.Rd +++ b/man/nafill.Rd @@ -42,6 +42,11 @@ x = gl(3, 2, 10) is.na(x) = 1:2 nafill(x, "nocb") +# works for character +x = c("a", NA, "b", NA, "c") +nafill(x, "locf") +nafill(x, "const", fill="z") + # fill= applies to any leftover NA nafill(c(NA, x), "locf") nafill(c(NA, x), "locf", fill=0) diff --git a/man/shift.Rd b/man/shift.Rd index 7815b32849..2eab31c2e1 100644 --- a/man/shift.Rd +++ b/man/shift.Rd @@ -27,6 +27,8 @@ shift(x, n=1L, fill, type=c("lag", "lead", "shift", "cyclic"), give.names=FALSE) \code{shift} is designed mainly for use in data.tables along with \code{:=} or \code{set}. Therefore, it returns an unnamed list by default as assigning names for each group over and over can be quite time consuming with many groups. It may be useful to set names automatically in other cases, which can be done by setting \code{give.names} to \code{TRUE}. Note that when using \code{shift} with a list, it should be a list of lists rather than a flattened list. The function was not designed to handle flattened lists directly. This also applies to the use of list columns in a data.table. For example, \code{DT = data.table(x=as.list(1:4))} is a data.table with four rows. Applying \code{DT[, shift(x)]} now lags every entry individually, rather than shifting the full columns like \code{DT[, shift(as.integer(x))]} does. Using \code{DT = data.table(x=list(1:4))} creates a data.table with one row. Now \code{DT[, shift(x)]} returns a data.table with four rows where x is lagged. To get a shifted data.table with the same number of rows, wrap the \code{shift} function in \code{list} or \code{dot}, e.g., \code{DT[, .(shift(x))]}. + + \code{shift} operates on positional order (row order), not on any inherent time ordering. For time-series or panel data, the data must be sorted by the time variable \emph{before} calling \code{shift}; otherwise the lag/lead will be computed over the wrong observations. Use \code{DT[order(timevar), shift(x), by=groupvar]} to lag within groups respecting time order. Users migrating from Stata should note that Stata's \code{L.var} (after \code{xtset}) automatically respects time order within panels, whereas \code{shift} requires explicit sorting. } \value{ A list containing the lead/lag of input \code{x}. @@ -57,14 +59,13 @@ DT[, (anscols) := shift(.SD, 1, 0, "lead"), .SDcols=cols] DT = data.table(year=2010:2014, v1=runif(5), v2=1:5, v3=letters[1:5]) DT[, shift(.SD, 1:2, NA, "lead", TRUE), .SDcols=2:4] -# lag/lead in the right order -DT = data.table(year=2010:2014, v1=runif(5), v2=1:5, v3=letters[1:5]) -DT = DT[sample(nrow(DT))] -# add lag=1 for columns 'v1,v2,v3' in increasing order of 'year' -cols = c("v1","v2","v3") -anscols = paste("lag", cols, sep="_") -DT[order(year), (cols) := shift(.SD, 1, type="lag"), .SDcols=cols] -DT[order(year)] +# shift operates on row position, not time order +DT = data.table(year=c(2012, 2010, 2011), v1=c(30, 10, 20)) +# WRONG: lag by row position (2010's value becomes lag of 2011) +DT[, lag_wrong := shift(v1, 1L)] +# RIGHT: sort by year first, then lag +DT[order(year), lag_right := shift(v1, 1L)] +DT # while grouping DT = data.table(year=rep(2010:2011, each=3), v1=1:6) From e2f84d4a3080fc83a39333b92ce042dc35086623 Mon Sep 17 00:00:00 2001 From: Leonidas Zhak <70497898+LeonidasZhak@users.noreply.github.com> Date: Sat, 6 Jun 2026 21:46:42 +0800 Subject: [PATCH 2/2] docs: add panel data example to shift.Rd for Stata migrants Add a panel data example showing the correct pattern for lagging within groups respecting time order. This is the most common use case for Stata users migrating to R, where they would use: xtset firm year gen lag_sales = L.sales The example demonstrates the equivalent data.table pattern: DT[order(firm, year), lag_sales := shift(sales, 1L), by = firm] This complements the existing WRONG/RIGHT single-entity example by showing the multi-entity panel data case with by=. --- man/shift.Rd | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/man/shift.Rd b/man/shift.Rd index 2eab31c2e1..6262a1c4b7 100644 --- a/man/shift.Rd +++ b/man/shift.Rd @@ -67,6 +67,17 @@ DT[, lag_wrong := shift(v1, 1L)] DT[order(year), lag_right := shift(v1, 1L)] DT +# panel data: lag within groups respecting time order +# (equivalent to Stata's: xtset firm year; gen lag_sales = L.sales) +DT = data.table( + firm = rep(c("A", "B"), each = 3), + year = rep(2010:2012, 2), + sales = c(100, 110, 125, 200, 215, 230) +) +# sort by firm + year, then lag sales within each firm +DT[order(firm, year), lag_sales := shift(sales, 1L), by = firm] +DT + # while grouping DT = data.table(year=rep(2010:2011, each=3), v1=1:6) DT[, c("lag1", "lag2") := shift(.SD, 1:2), by=year]