Specification: for each data point, compute average of data points within last 7 days
Example input/output:
Input:
data
time | x |
---|---|
20 | 14.5 |
3 | 3 |
1 | 1 |
9 | 7 |
10 | 11 |
11 | 12 |
Output:
end_time average 1
1
3
2
9
5
10
9
11
10
20
14.5
Python - Imperative
def rolling_average(data):
data.sort(key=lambda v: v["time"])
result = []
for i, value in enumerate(data):
end = value["time"]
total, count = 0.0, 0
for j in range(i, -1, -1):
if data[j]["time"] <= end - 7:
break
total += data[j]["x"]; count += 1
result.append(
{"end_time": end,
"average": total / count}
)
return result
Python - Functional
def rolling_average(data):
return [
{"end_time": x["time"],
"average": sum(vs) / len(vs)}
for x in data
for vs in [
[y["x"] for y in data
if y["time"] <= x["time"] and
y["time"] > x["time"] - 7]
]
]
Python - Pandas
def rolling_average(data):
d = data.copy()
data.time = pd.to_datetime(data.time * 10**9)
data = (data.sort_values('time').set_index('time')
.rolling(window='7s').mean())
return pd.DataFrame.from_dict(
{'end_time': d.sort_values('time').reset_index().time,
'average': data.reset_index().x}
)
R - Tidyverse
library(slider)
rolling_average <- function(data) {
data <- arrange(data, time)
avgs <- unlist(slide_index(data$x, data$time, ~ mean(.x), .before = 6))
data %>% mutate(end_time = time, average = avgs) %>% select(end_time, average)
}
SQL - SQLite
SELECT
end.time as end_time, AVG(other.x) as average
FROM data as end
JOIN data as other
ON other.time <= end.time and
other.time > end.time - 7
GROUP BY end.time
Datalog - Souffle
.decl window(end_time:number,time:number)
window(end_time,t):-
data(end_time,_),data(t,_),
t<=end_time,t>end_time7.
.decl bucket(end_time:number,total:float,n:float)
bucket(end_time,total,n):-data(end_time,_),
total=sumvdata(t,v),window(end_time,t),
n=sumzdata(t,_),window(end_time,t),z=1.0.
rolling_average(end_time,v):-
bucket(end_time,total,n),v=totaln.
Q - kdb+
get_avg:
{[t] (select avg(x) from data where time within (t - 6; t)) `x};
rolling_average:
select end_time: time, average: get_avg'[time] from data