Specification: for each data point, compute average of data points within last 7 days
Example input/output:
Input:
data| time | x |
|---|---|
20 | 14.5 |
3 | 3 |
1 | 1 |
9 | 7 |
10 | 11 |
11 | 12 |
Output:
end_time average 11329510911102014.5
Python - Imperative
def rolling_average(data):
data.sort(key=lambda v: v["time"])
result = []
for i, value in enumerate(data):
end = value["time"]
total, count = 0.0, 0
for j in range(i, -1, -1):
if data[j]["time"] <= end - 7:
break
total += data[j]["x"]; count += 1
result.append(
{"end_time": end,
"average": total / count}
)
return resultPython - Functional
def rolling_average(data):
return [
{"end_time": x["time"],
"average": sum(vs) / len(vs)}
for x in data
for vs in [
[y["x"] for y in data
if y["time"] <= x["time"] and
y["time"] > x["time"] - 7]
]
]Python - Pandas
def rolling_average(data):
d = data.copy()
data.time = pd.to_datetime(data.time * 10**9)
data = (data.sort_values('time').set_index('time')
.rolling(window='7s').mean())
return pd.DataFrame.from_dict(
{'end_time': d.sort_values('time').reset_index().time,
'average': data.reset_index().x}
)R - Tidyverse
library(slider)
rolling_average <- function(data) {
data <- arrange(data, time)
avgs <- unlist(slide_index(data$x, data$time, ~ mean(.x), .before = 6))
data %>% mutate(end_time = time, average = avgs) %>% select(end_time, average)
}SQL - SQLite
SELECT end.time as end_time, AVG(other.x) as average FROM data as end JOIN data as other ON other.time <= end.time and other.time > end.time - 7 GROUP BY end.time
Datalog - Souffle
.decl window(end_time: number, time: number)
window(end_time, t) :-
data(end_time, _), data(t, _),
t <= end_time, t > end_time - 7.
.decl bucket(end_time: number, total: float, n: float)
bucket(end_time, total, n) :- data(end_time, _),
total = sum v : {data(t, v), window(end_time, t)},
n = sum z : {data(t, _), window(end_time, t), z = 1.0}.
rolling_average(end_time, v) :-
bucket(end_time, total, n), v = total / n.Q - kdb+
get_avg:
{[t] (select avg(x) from data where time within (t - 6; t)) `x};
rolling_average:
select end_time: time, average: get_avg'[time] from data