Question

multiple times addition of two columns with dplyr

Assume I have a tibble (in my example here only with four columns). In real I have two hourly time series therefore 2 times 24 columns:

s1.x,...,s24.x, s1.y,...,s24.y

Now I want to sum up column s1.x with s1.y, s2.x with s2.y to s24.x with s24.y.

a <- tibble(s1.x=2:7, s2.x=3:8, s1.y=4:9, s2.y=5:10)

a %>% 
  mutate(s1.tot=s1.x+s1.y, s2.tot=s2.x+s2.y)

How can I do this in a short (and elegant way) for all 24 hours?

2 112 2

1 Jan 1970

Solution

A base R solution

cbind(
    a,
    lapply(
        split.default(a, sub("\\..*", ".tot", names(a))),
        rowSums
    )
)

or even shorter

cbind(a, t(rowsum(t(a), sub("\\..*", ".tot", names(a)))))

which gives

  s1.x s2.x s1.y s2.y s1.tot s2.tot
1    2    3    4    5      6      8
2    3    4    5    6      8     10
3    4    5    6    7     10     12
4    5    6    7    8     12     14
5    6    7    8    9     14     16
6    7    8    9   10     16     18

Benchmarking

If you are care about the speed

edward <- function() {
    a %>%
        mutate(id = 1:nrow(a)) %>%
        pivot_longer(-id,
            names_to = c("s", ".value"),
            names_pattern = "(s\\d+).(.)"
        ) |>
        mutate(tot = x + y, .by = id) |>
        pivot_wider(names_from = s, values_from = c(x, y, tot), names_glue = "{s}.{.value}") %>%
        select(-id)
}


mael <- function() {
    a |>
        mutate(across(matches(".x$"), .names = "{gsub('x', '', .col)}tot") +
            across(matches(".y$")))
}

tic1 <- function() {
    cbind(
        a,
        lapply(
            split.default(a, sub("\\..*", ".tot", names(a))),
            rowSums
        )
    )
}

tic2 <- function() {
    cbind(a, t(rowsum(t(a), sub("\\..*", ".tot", names(a)))))
}

microbenchmark(
    edward(),
    mael(),
    tic1(),
    tic2(),
    unit = "relative",
    check = "equivalent"
)

shows

Unit: relative
     expr       min        lq      mean    median        uq      max neval
 edward() 43.286275 33.290854 29.937941 32.085324 30.763830 8.997682   100
   mael()  8.465686  7.081354  6.652635  6.930750  6.867252 2.572953   100
   tic1()  1.673039  1.469692  1.527453  1.417756  1.315485 1.425672   100
   tic2()  1.000000  1.000000  1.000000  1.000000  1.000000 1.000000   100

2024-07-23

ThomasIsCoding

Solution

I would do it with two across in this case, although pivoting might be a better option for more complex operations.

library(dplyr)
a |> 
  mutate(across(matches(".x$"), .names = "{gsub('x', '', .col)}tot") + 
           across(matches(".y$")))

#   s1.x s2.x s1.y s2.y s1.tot s2.tot
# 1    2    3    4    5      6      8
# 2    3    4    5    6      8     10
# 3    4    5    6    7     10     12
# 4    5    6    7    8     12     14
# 5    6    7    8    9     14     16
# 6    7    8    9   10     16     18

2024-07-23

Maël

Solution

One way is to pivot. But we need to add an id column to do the sum.

a %>% 
  mutate(id=1:nrow(a)) %>%
  pivot_longer(-id,
               names_to=c("s", ".value"),
               names_pattern="(s\\d+).(.)") |>
  mutate(tot=x+y, .by=id) |>
  pivot_wider(names_from=s, values_from=c(x,y,tot), names_glue="{s}.{.value}")

# A tibble: 6 × 7
     id  s1.x  s2.x  s1.y  s2.y s1.tot s2.tot
  <int> <int> <int> <int> <int>  <int>  <int>
1     1     2     3     4     5      6      8
2     2     3     4     5     6      8     10
3     3     4     5     6     7     10     12
4     4     5     6     7     8     12     14
5     5     6     7     8     9     14     16
6     6     7     8     9    10     16     18

2024-07-23

Edward

Solution

You can use Map function:

n = 2      # for your data, use n = 24
a[paste0("s", 1:n, ".tot")] = Map(`+`, a[paste0("s", 1:n, ".x")], a[paste0("s", 1:n, ".y")])

# A tibble: 6 × 6
   s1.x  s2.x  s1.y  s2.y s1.tot s2.tot
  <int> <int> <int> <int>  <int>  <int>
1     2     3     4     5      6      8
2     3     4     5     6      8     10
3     4     5     6     7     10     12
4     5     6     7     8     12     14
5     6     7     8     9     14     16
6     7     8     9    10     16     18

2024-07-23

B. Christian Kamgang

Solution

Here's a general tidyverse solution - 24 hours, 7 days (series), 5 observations example.

# Pkgs and seed -----------------------------------------------------------
library(tidyverse)
set.seed(100)

# Toy data - 24 hours a day, 7 days, 5 observations -----------------------
my_df <- paste0(rep(paste0("s", str_pad(1:24, width = 2, pad = "0")), 7), ".", rep(letters[19:25], each = 24))
my_df <- as_tibble(matrix(sample(0:10, (7*24)* 5, replace = TRUE), nrow = 5, dimnames = list(rep(NA, 5), my_df)))

# code - Creates a list and reduces it ------------------------------------
new_df <- reduce(map(
  str_unique(str_extract(colnames(my_df), ".$")),
  \(i) rename_with(select(my_df, ends_with(i)), \(j) str_replace(j, paste0(i, "$"), "tot"))), 
  `+`)

# Output ------------------------------------------------------------------
select(my_df,contains("01"))
#> # A tibble: 5 × 7
#>   s01.s s01.t s01.u s01.v s01.w s01.x s01.y
#>   <int> <int> <int> <int> <int> <int> <int>
#> 1     9     9    10     0     4     2     2
#> 2     6     1     4     2     3     3     3
#> 3     5     4     4     9    10     5     8
#> 4     2     9     4     4     9     0     3
#> 5     8     3     9     2     8    10     2

select(new_df,contains("01"))
#>   s01.tot
#> 1      36
#> 2      22
#> 3      45
#> 4      31
#> 5      42

^{Created on 2024-07-23 with reprex v2.1.0}

2024-07-23

Adriano Mello