Another base R solution:
f <- \(x) (length(x) - which.max(rev(x) != x[length(x)]) + 1L)%%length(x) + 1L
I'll compare it to the other options along with some benchmarking. Tossing in a couple edge cases:
a <- c(14,26,38,64,96,127,152,152,152,152,152,152)
b <- c(4,7,9,13,13,13,13,13,13,13,13,13,13,13)
c <- c(62,297,297,297,297,297,297,297,297,297,297,297)
d <- numeric(12)
e <- 1:14
Testing the proposed answers, including the edge cases:
get_index <- \(x) sum(head(rle(x)$lengths, -1)) + 1L
Edward <- \(a) length(a) - min(which(diff(rev(a))!=0)) + 1L
first_conseq <- \(x) which(diff(x) == 0)[1]
sapply(list(a, b, c, d, e), f)
#> [1] 7 4 2 1 14
sapply(list(a, b, c, d, e), get_index)
#> [1] 7 4 2 1 14
sapply(list(a, b, c, d, e), Edward)
#> Warning in min(which(diff(rev(a)) != 0)): no non-missing arguments to min;
#> returning Inf
#> [1] 7 4 2 -Inf 14
sapply(list(a, b, c, d, e), first_conseq)
#> [1] 7 4 2 1 NA
And SamR's Rcpp function (modified slightly for speed):
Rcpp::cppFunction('
int get_index2(const NumericVector& x) {
const int n = x.size();
const double last_value = x[n - 1];
for (int i = n - 2; i >= 0; --i) {
if (x[i] != last_value) {
return i + 2; // +1 as it is next element; +1 for 1-indexing
}
}
return 1; // all elements are the same
}
')
sapply(list(a, b, c, d, e), get_index2)
#> [1] 7 4 2 1 14
Only f
and the get_index
functions behave well with the edge cases.
Benchmarking with a larger dataset:
n <- sample(1e5, 1e3, 1)
x <- lapply(n, \(n) c(sample(1e4, n, 1), 0L, sample(1e5 - n, 1))[-1:-2])
identical(n, vapply(x, f, 0L))
#> [1] TRUE
bench::mark(
f = vapply(x, f, 0L),
get_index = vapply(x, get_index, 0L)
)
#> Warning: Some expressions had a GC in every iteration; so filtering is
#> disabled.
#> # A tibble: 2 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 f 306.6ms 316.29ms 3.16 580.07MB 12.6
#> 2 get_index 2.46s 2.46s 0.406 4.91GB 13.8
#> 3 get_index2 62.4ms 67.14ms 14.6 404.34MB 42.0