This benchmark is a work-in-progress dataset of programs implementing tabular data analytics tasks, similar to Rosetta Code or 7GUIs. This benchmark has two goals:

  1. Assist in cross-language learning. This benchmark provides an interface for easily comparing programs that do the same task in many languages.
  2. Quantify the relationship between programs and tasks. A preliminary analysis of program size compares the relative conciseness of each language.
Click on a task nameorClick on a languageorRead the analysis
Note: this site is optimized for large screens. Try it when you get the chance.
CategoryTask namePython - ImperativePython - FunctionalPython - PandasR - TidyverseSQL - SQLiteDatalog - SouffleQ - kdb+
AggregationContinent with the highest average population
def continent_by_population(countries):
  continent_stats = defaultdict(lambda: [0, 0])
  for country in countries:
    continent = country['continent']
    continent_stats[continent][0] += country['population']
    continent_stats[continent][1] += 1
     
  max_continent = None
  max_average = None
  for continent, [total, count] in continent_stats.items():
    average = total / count
    if max_average is None or max_average < average:
      max_average = average
      max_continent = continent
      
  return max_continent
def continent_by_population(countries):
  continents = set([c['continent'] for c in countries])
  populations_by_continent = [
    (continent, [c['population'] for c in countries 
                 if c['continent'] == continent])
    for continent in continents
  ]
  averages = [
    (continent, sum(pops) / len(pops))
    for continent, pops in populations_by_continent
  ]
  return max(averages, key=lambda t: t[1])[0]
def continent_by_population(countries):
  mean_pop = countries.groupby('continent').population.mean()
  return mean_pop.index[mean_pop.argmax()]
continent_by_population <- function(countries) {
  countries %>%
    group_by(continent) %>%
    summarize(mean_pop = mean(population)) %>%
    slice(which.max(mean_pop)) %>%
    .$continent
}
SELECT continent 
FROM countries
GROUP BY continent
ORDER BY AVG(population) DESC
LIMIT 1
.decl average_population(Continent:symbol, Avg:number)
average_population(Continent, Avg) :-
  countries(Continent, _, _),
  Total = sum P : countries(Continent, _, P),
  Num_countries = count : countries(Continent, _, _),
  Avg = Total / Num_countries.
  
continent_by_population(Continent) :- 
  countries(Continent, _, _), 
  average_population(Continent, Max_avg),
  Max_avg = max A : { countries(C, _, _), average_population(C, A) }.
averages: 
  select avg(population) by continent from countries;
continent_by_population: 
  (first select[>population] continent from averages) `continent
First element that doesn't change mean

def get_mean(ls):
  sum = 0
  for e in ls:
    sum += e['value']
  return sum/len(ls)

def changing_mean(vals):
  start_mean = get_mean(vals)
  for i,elem in enumerate(vals):
    new_ls = vals[:i] + vals[i+1:]
    mean = get_mean(new_ls)
    if abs(mean - start_mean) < 0.1:
      return i
  return None

def changing_mean(vals):
  start_mean = sum([l['value'] for l in vals])/len(vals)
  diff = lambda m, sm: abs(m - sm)
    
  for i,elem in enumerate(vals):
    new_ls = [x['value'] for x in vals if x != elem]
    mean = sum(new_ls)/len(new_ls)
    if diff(mean,start_mean) < 0.1:
      return i
  return None

def changing_mean(vals):
  mean = vals.value.mean()
  mean_without = vals.apply(lambda row: vals[vals.id != row.id].value.mean(), axis=1)
  return vals[(mean_without - mean).abs() < 0.1].id.tolist()
library(data.table)
changing_mean <- function(vals) {
   global_mean <- mean(vals$value)
   setDT(vals)[, 
     mean_change := abs(global_mean - mean(vals[id != .BY, value])), 
     by = id] %>%
     filter(mean_change < 0.1) %>%
     slice(which.min(id)) %>%
     pull(id)
}
SELECT id
FROM vals v1
WHERE ABS((
  SELECT AVG(value)
  FROM vals v2
  WHERE v1.id != v2.id
) - (SELECT AVG(value) FROM vals)) < 0.1
.decl avg_except(I:number, Avg:float)    
avg_except(I, Avg) :- 
  vals(I, _),
  N = sum 1.0 : { vals(J, _), I != J },
  Total = sum V : { vals(J, V), I != J },
  Avg = Total / N.
  
changing_mean(I) :-
  vals(I, _),
  N = sum 1.0 : vals(_, _),
  Total = sum V : vals(_, V),
  Avg = Total / N,
  avg_except(I, AvgExcept),
  ((Avg > AvgExcept, Avg - AvgExcept < 0.1);
   (Avg < AvgExcept, AvgExcept - Avg < 0.1)).
diffs: 
  {abs avg vals[`value] - avg (vals[`value] where vals[`id] <> x)} 
  each vals[`id];
  
changing_mean: first vals[`id] where diffs < 0.1
Median population for each continent
def continent_median_population(countries):
  populations = defaultdict(list)
  for country in countries:
    populations[country['continent']].append(country['population'])
  
  output = []  
  for continent, pops in populations.items():
    pops.sort()
    N = len(pops)
    if N % 2 == 1:
      median = pops[(N - 1) // 2]
    else:
      median = (pops[N // 2 - 1] + pops[N // 2]) / 2
    output.append({
      "continent": continent,
      "population": median
    })
    
  return output
def continent_median_population(countries):
  continents = set([c['continent'] for c in countries])
  populations = {
    continent: [
      c['population'] for c in countries 
      if c['continent'] == continent
    ]
    for continent in continents
  }
  
  def compute_median(pops):
    pops = sorted(pops)
    N = len(pops)
    if N % 2 == 1:
      return pops[(N - 1) // 2]
    else:
      return (pops[N // 2 - 1] + pops[N // 2]) / 2  
   
  return [
    {"continent": continent, "population": compute_median(pops)}
    for continent, pops in populations.items()
  ]
def continent_median_population(countries):
  return (countries
      .groupby('continent')
      .population.median()
      .reset_index())
continent_median_population <- function(countries) {
  countries %>%
    group_by(continent) %>%
    summarize(population = median(population))
}
SELECT continent, AVG(population) as population
FROM
  (SELECT *, 
    row_number() OVER 
      (PARTITION BY continent ORDER BY population) AS rank, 
    count() OVER 
      (PARTITION BY continent) as count
  FROM countries)
WHERE 
  (count % 2 = 1 AND rank = (count + 1) / 2) OR 
  (count % 2 = 0 AND ABS(rank - 0.5 - count / 2) = 0.5)
GROUP BY continent
.decl unique_id(Country:symbol, Id:number)    
unique_id(Country, $) :- countries(_, Country, _).

.decl rank(Continent:symbol, R:number, Population:float)
rank(Continent, R_less + R_eq, Population) :-
  countries(Continent, Country, Population),
  unique_id(Country, Id),
  R_less = count : { 
    countries(Continent, C, P), 
    unique_id(C, Id2),
    P < Population
  },
  R_eq = count : {
    countries(Continent, C, P), 
    unique_id(C, Id2),
    P = Population, 
    Id2 < Id
  }.

continent_median_population(Continent, Median) :-
  countries(Continent, _, _),
  Num_countries = count : countries(Continent, _, _),
  ((Num_countries % 2 = 1, 
    rank(Continent, (Num_countries - 1) / 2, Median));
   (Num_countries % 2 = 0,
    rank(Continent, Num_countries / 2 - 1, P1),
    rank(Continent, Num_countries / 2, P2),
    Median = (P1 + P2) / 2)).     
continent_median_population:
  () xkey select med[population] by continent from countries
Youngest person over 35
def youngest_over_35(people):
  over_35 = []
  for person in people:
    if person['age'] > 35:
      over_35.append(person)
    
  youngest = None
  for person in over_35:
    if youngest is None or person['age'] < youngest['age']:
      youngest = person
  return youngest['name'] if youngest is not None else None
def youngest_over_35(people):
  over_35 = [p for p in people if p['age'] > 35]
  youngest = min(over_35, default=None, key=lambda p: p['age'])
  return youngest['name'] if youngest is not None else None
def youngest_over_35(people):
  over_35 = people[people.age > 35]
  if len(over_35) == 0: 
    return None
  else:
    youngest = over_35.loc[over_35.age.idxmin()]
    return youngest['name']
youngest_over_35 <- function(people) {
  people %>%
    filter(age > 35) %>%
    slice(which.min(age)) %>%
    pull(name)
}
SELECT name
FROM people
WHERE age = (
  SELECT MIN(age)
  FROM people
  WHERE age > 35)
LIMIT 1
youngest_over_35(Name) :- 
  people(Age, Name), 
  Age = min Age : { people(Age, _), Age > 35 }.
old_enough: select from people where age > 35
youngest_over_35: 
  (first select name from old_enough where age = min(age)) `name
JoinsDirectors of movies Tom Hanks starred in

def tom_hanks(actors,directors):
  r_directors = set()
  for a in actors:
    if 'Tom Hanks' == a['actor']:
      for d in directors:
        if a['movie'] == d['movie']:
          r_directors.add(d['director'])
    return r_directors

def tom_hanks(actors,directors):
  movies = [a['movie'] for a in actors if a['actor'] == 'Tom Hanks']
  directors = [d['director'] for d in directors if d['movie'] in movies]
  return directors

def tom_hanks(actors,directors):
  movies = list(actors.loc[
    actors['actor'] == 'Tom Hanks', 'movie'])
  return directors[directors['movie'].
    isin(movies)]['director']
tom_hanks <- function(actors, directors) {
  actors %>%
    filter(actor == "Tom Hanks") %>%
    inner_join(directors, by = "movie") %>%
    pull(director)
}

SELECT directors.director
FROM 
  directors 
  INNER JOIN actors
  ON directors.movie = 
  actors.movie
WHERE actors.actor="Tom Hanks"

tom_hanks(Director):-
  actors("Tom Hanks",movie),
  directors(Director,movie).
tom_hanks:
  (select director from actors ij (`movie xkey directors) 
   where actor ~\: "Tom Hanks") `director
Row per family to row per child

def row_per_child(families):
  children = []
  for family in families:
    for i in [1, 2, 3]:
      children.append({
        'family': family['family'],
        'child': f'child{i}',
        'dob': family[f'dob_child{i}'],
        'height': family[f'height_child{i}']
      })
  return children
def row_per_child(families):
  return [
    {'family': family['family'],
     'child': f'child{i}',
     'dob': family[f'dob_child{i}'],
     'height': family[f'height_child{i}']}
    for family in families    
    for i in [1, 2, 3]
  ]
def row_per_child(families):
  df = pd.wide_to_long(
    families, 
    stubnames=['dob', 'height'], 
    sep="_child", 
    i='family', 
    j='child').reset_index()
  df.child = df.child.map(lambda c: f'child{c}')
  return df
row_per_child <- function(families) {
  families %>%
    pivot_longer(
      !family,
      names_to = c(".value", "child"),
      names_sep = "_",
    )
}
SELECT 
  family,
  ('child' || child) AS child,
  (CASE child 
    WHEN 1 THEN dob_child1 
    WHEN 2 THEN dob_child2 
    WHEN 3 THEN dob_child3 
   END) AS dob,
  (CASE child 
    WHEN 1 THEN height_child1 
    WHEN 2 THEN height_child2 
    WHEN 3 THEN height_child3 
   END) AS height
FROM 
  families
  CROSS JOIN
  (SELECT 1 as child UNION VALUES (2), (3))
row_per_child("child1", dob, family, height) :-
  families(dob, _, _, family, height, _, _).
row_per_child("child2", dob, family, height) :-
  families(_, dob, _, family, _, height, _).
row_per_child("child3", dob, family, height) :-
  families(_, _, dob, family, _, _, height).
child_rows: {[child] 
  rows: ?[families; (); 0b; `family`dob`height ! 
    (`family; `$("dob_child",child); `$("height_child", child))];
  update child: (count families)#enlist ("child", child) from rows}
      
row_per_child: (child_rows each ("1"; "2"; "3")) ,/;
StringsConvert strings with different formats to numbers
def strings_to_numbers(numbers):
  output = []
  for row in numbers:
    if row["format"] == 'comma_sep':
      sep = ","
    else:
      sep = "_"
    output.append(int(row["value"].replace(sep, "")))
  return output
def strings_to_numbers(numbers):
  return [
    int(row["value"].replace(
      "," if row["format"] == "comma_sep" else "_", ""))
    for row in numbers
  ]
def strings_to_numbers(numbers):
  def convert(row):
    sep = "," if row.format == 'comma_sep' else "_"
    return int(row.value.replace(sep, ""))
  return numbers.apply(convert, axis=1).tolist()
strings_to_numbers <- function(numbers) {
  numbers %>%
    mutate(output = as.numeric(str_replace_all(
      value, ifelse(format == "comma_sep", ",", "_"), ""))) %>%
    pull(output)
}
SELECT 
  CAST(
    REPLACE(
      value, 
      CASE format 
        WHEN "comma_sep" THEN "," 
        WHEN "under_sep" THEN "_" 
      END, "")
    AS integer)
FROM numbers
.decl clean(Format:symbol, Inp:symbol, I:number, Outp:symbol) 
clean(Format, Inp, 0, "") :- numbers(Format, Inp).
clean(Format, Inp, I+1, Outp) :-
  clean(Format, Inp, I, Outp_rec),
  I <= strlen(Inp),
  Chr = substr(Inp, I, 1),
  ((Format = "comma_sep", Sep = ",");
   (Format = "under_sep", Sep = "_")),
  ((Chr  = Sep, Outp = Outp_rec);
   (Chr != Sep, Outp = cat(Outp_rec, Chr))).

strings_to_numbers(N) :-
  clean(Format, Inp, strlen(Inp), Outp),
  N = to_number(Outp).
convert: {[val; format] 
  sep: (("comma_sep"; "under_sep") ! (","; "_")) format;
  "J" $ ssr[val; sep; ""]}
  
strings_to_numbers:
  convert'[numbers[`value]; numbers[`format]]
Documents with infrequent words
def documents_with_infrequent_words(documents):
  words = {}
  freq = defaultdict(int)
  for doc in documents:
    words[doc["id"]] = doc["text"].split(" ")
    for word in words[doc["id"]]:
      freq[word] += 1
      
  infrequent_words = set()
  for word, count in freq.items():
    if count == 1:
      infrequent_words.add(word)
      
  infrequent_docs = []
  for doc in documents:
    for word in words[doc["id"]]:
      if word in infrequent_words:
        infrequent_docs.append(doc["id"])
        break
        
  return infrequent_docs
def documents_with_infrequent_words(documents):
  words = [doc["text"].split(" ") for doc in documents]
  words_flat = [w for ws in words for w in ws]
  freq = {
    word: words_flat.count(word) 
    for word in set(words_flat)
  }
  infrequent_words = set([
    word for word, count in freq.items() 
    if count == 1
  ])
  infrequent_docs = [
    documents[i]["id"] for i, ws in enumerate(words) 
    if len(set(ws) & infrequent_words) > 0
  ]
  return infrequent_docs
def documents_with_infrequent_words(documents):
  words = documents.text.str.split(" ", expand=True)
  freq = words.stack().value_counts()
  infrequent_words = freq[freq == 1].index.values
  infrequent_docs = documents[
    np.isin(words.values, infrequent_words)]
  return infrequent_docs.id.unique().tolist()
documents_with_infrequent_words <- function(documents) {
  split <- documents %>%
    mutate(word = str_split(text, " ")) %>%
    unnest()
  freq <- split %>% count(word)
  unique_words <- freq %>% filter(n == 1)
  split %>% 
    filter(word %in% unique_words$word) %>%
    pull(id) %>%
    unique()
}
-- NOTE: SQLite tokenize is case-insensitive by default, 
-- so this solution is NOT exactly like the others

CREATE VIRTUAL TABLE doc_index USING fts4(
  text, id, content=documents, tokenize=simple);    
INSERT INTO doc_index(doc_index) VALUES('rebuild');
CREATE VIRTUAL TABLE words USING fts4aux(doc_index);    

SELECT DISTINCT id
FROM 
  documents
  CROSS JOIN
  (SELECT DISTINCT term
   FROM words
   WHERE occurrences = 1) unique_words
WHERE
  (LOWER(text) LIKE '% ' || term || ' %') OR
  (LOWER(text) LIKE term || ' %') OR
  (LOWER(text) LIKE '% ' || term) OR
  (LOWER(text) LIKE term)
.decl substrs(Text:symbol, Idx:number, Len:number)
substrs(Text, 0, 1) :- 
  documents(_, Text), strlen(Text) > 0.
substrs(Text, 0, Len+1) :- 
  substrs(Text, 0, Len), Len + 1 <= strlen(Text).
substrs(Text, Idx+1, Len) :- 
  substrs(Text, Idx, Len), Idx + Len + 1 <= strlen(Text).

.decl token(Docid:number, Text:symbol, Idx:number, Word:symbol)
token(Docid, Text, Idx, Word) :-
  documents(Docid, Text),
  substrs(Text, Idx, Len),
  Prev = Idx - 1, Next = Idx + Len,
  (Prev < 0; " " = substr(Text, Prev, 1)),
  (Next = strlen(Text); " " = substr(Text, Next, 1)),
  Word = substr(Text, Idx, Len),
  !contains(" ", Word).

documents_with_infrequent_words(Id) :-
  documents(Id, _),
  token(Id, _, _, Word),
  1 = count : token(_, _, _, Word).
words: (" " vs) each documents[`text];
freq: count each group raze words;
uniq: where[freq=1];
documents_with_infrequent_words:
  (select id from documents where '[any; in\: [;uniq]] each words) `id
Filter and clean tweets
def process_tweets(data):
  result = []
  for value in data:
    if (value["language"] == "en" and
        value["is_retweet"] == "false"):
      result.append({
        "body": value["body"].lower(),
        "ts": value["ts"]
      })
  return result
def process_tweets(data):
  return [
    {"body": value["body"].lower(),
     "ts": value["ts"]}
    for value in data
    if value["language"] == "en" and
       value["is_retweet"] == "false" 
  ]
def process_tweets(data):
  result = data[
    (data.language == 'en') &
    (data.is_retweet == 'false')]
  result.body = result.body.apply(lambda s: s.lower())
  return result[["body", "ts"]]
process_tweets <- function(data) {
  data %>%
    filter(language == "en" & is_retweet == "false") %>%
    mutate(body = tolower(body)) %>%
    select(ts, body)
}
SELECT LOWER(body) as body, ts
FROM data
WHERE language = "en" and is_retweet = "false"
process_tweets:
  select lower[body], ts from data 
  where (is_retweet ~\: "false") and (language ~\: "en")
First-order logicPeople that purchased all possible items
def purchased_all_food(food, orders):
  n_food = len(food)
  unique_orders = defaultdict(set)
  for order in orders:
    unique_orders[order["buyer"]].add(order["food"])
      
  buyers = []
  for buyer, orders in unique_orders.items():
    if len(orders) == n_food:
      buyers.append(buyer)
  return buyers
def purchased_all_food(food, orders):
    n_food = len(food)
    buyers = set([order["buyer"] for order in orders])
    return [
        buyer
        for buyer in buyers
        if len(set([order["food"] for order in orders 
             if order["buyer"] == buyer])) == n_food
    ]
def purchased_all_food(food, orders):
  n_food = len(food)
  n_unique_orders = (orders
    .groupby('buyer')
    .food.unique() 
    .map(len))
  return n_unique_orders[n_unique_orders == n_food].index.values
purchased_all_food <- function(food, orders) {
  n_food <- count(food)    
  orders %>%
    group_by(buyer) %>%
    distinct(food) %>%
    count() %>%
    filter(n == n_food) %>%
    pull(buyer)
}
SELECT DISTINCT buyer
FROM orders o1
WHERE 
  (SELECT COUNT(DISTINCT food) FROM orders o2 
   WHERE o1.buyer = o2.buyer) = 
  (SELECT COUNT(*) FROM food)
.decl has_purchased(Buyer:symbol, Food:number)
has_purchased(Buyer, Food) :- orders(Buyer, Food, _).

purchased_all_food(Buyer) :-
  orders(Buyer, _, _),
  N_food = count : food(_, _),
  N_unique_orders = count : has_purchased(Buyer, _),
  N_food = N_unique_orders.
buyers: `buyer xgroup orders;
total: count food;
purchased_all_food:
  (where {(count distinct x[`food]) = total} each buyers) `buyer
People who like a unique set of beer
def unique_beer_drinkers(likes):
  likes_per_person = defaultdict(set)
  for row in likes:
    likes_per_person[row['name']].add(row['beer'])
    
  unique = []
  for p1, p1_likes in likes_per_person.items():
    is_unique = True
    for p2, p2_likes in likes_per_person.items():
      if p1 == p2:
        continue
        
      if p1_likes == p2_likes:
        is_unique = False
        break
    if is_unique:
      unique.append(p1)
      
  return unique
def unique_beer_drinkers(likes):
  people = set([row['name'] for row in likes])
  likes_per_person = {
    name: set([row['beer'] for row in likes if row['name'] == name])
    for name in people
  }
    
  return [
    name
    for name, beers in likes_per_person.items()
    if not any([
      other_name != name and beers == other_beers
      for other_name, other_beers in likes_per_person.items()
    ])
  ]
def unique_beer_drinkers(likes):
  likes_per_person = (likes
    .groupby('name')
    .beer.unique().map(set)
    .reset_index())

  def check_not_exists(row):
    other_people = likes_per_person[
      likes_per_person.name != row['name']]
    return not (other_people.beer == row['beer']).any()
  
  unique_drinkers = likes_per_person[
    likes_per_person.apply(check_not_exists, axis=1)]
  return unique_drinkers.name.tolist()
unique_beer_drinkers <- function(likes) {
  likes %>%
    group_by(name) %>%
    summarize(beer_set = list(sort(unique(beer)))) %>%
    add_count(beer_set, name = 'num_people_likes') %>%
    filter(num_people_likes == 1) %>%
    pull(name)
}
SELECT DISTINCT L1.name
FROM likes L1
WHERE NOT EXISTS(
    SELECT *
    FROM likes L2
    WHERE L1.name != L2.name
    AND NOT EXISTS(
        SELECT *
        FROM likes L3
        WHERE L3.name = L2.name
        AND NOT EXISTS(
            SELECT *
            FROM likes L4
            WHERE L4.name = L1.name
            AND L4.beer = L3.beer))
    AND NOT EXISTS(
        SELECT *
        FROM likes L5
        WHERE L5.name = L1.name
        AND NOT EXISTS(
            SELECT *
            FROM likes L6
            WHERE L6.name = L2.name
            AND L6.beer= L5.beer)))
.decl differ(a:symbol, b:symbol)
differ(A, B) :- likes(Beer, A), likes(_, B), !likes(Beer, B).
differ(A, B) :- likes(_, A), likes(Beer, B), !likes(Beer, A).

.decl exists_same(a:symbol)
exists_same(Name) :- 
  likes(_, Other), 
  likes(_, Name), 
  Name != Other, 
  !differ(Name, Other).

unique_beer_drinkers(Name) :- 
  likes(_, Name), 
  !exists_same(Name).
likes_per_person: `name xgroup likes;
counts: count each group likes_per_person;
unique_beer_drinkers: 
  (select name from likes_per_person where beer in\: where[counts=1])
  `name
Time SeriesRolling average
def rolling_average(data):
  data.sort(key=lambda v: v["time"])
  result = []
  for i, value in enumerate(data):
    end = value["time"]
    total, count = 0.0, 0
    for j in range(i, -1, -1):
      if data[j]["time"] <= end - 7:
        break
      total += data[j]["x"]; count += 1
    result.append(
      {"end_time": end,
       "average": total / count}
    )
  return result
def rolling_average(data):
  return [
    {"end_time": x["time"],
     "average": sum(vs) / len(vs)}
    for x in data
    for vs in [
      [y["x"] for y in data
              if y["time"] <= x["time"] and
                 y["time"] > x["time"] - 7]
    ]
  ]
def rolling_average(data):
  d = data.copy()

  data.time = pd.to_datetime(data.time * 10**9)
  data = (data.sort_values('time').set_index('time')
              .rolling(window='7s').mean())

  return pd.DataFrame.from_dict(
    {'end_time': d.sort_values('time').reset_index().time,
     'average': data.reset_index().x}
  )
library(slider)
rolling_average <- function(data) {
  data <- arrange(data, time)
  avgs <- unlist(slide_index(data$x, data$time, ~ mean(.x), .before = 6))
  data %>% mutate(end_time = time, average = avgs) %>% select(end_time, average)
}
SELECT
end.time as end_time,  AVG(other.x) as average
FROM data as end
JOIN data as other
ON other.time <= end.time and
   other.time > end.time - 7
GROUP BY end.time
.decl window(end_time: number, time: number)
window(end_time, t) :-
  data(end_time, _), data(t, _),
  t <= end_time, t > end_time - 7.
.decl bucket(end_time: number, total: float, n: float)
bucket(end_time, total, n) :- data(end_time, _),
  total = sum v : {data(t, v), window(end_time, t)},
      n = sum z : {data(t, _), window(end_time, t), z = 1.0}.
rolling_average(end_time, v) :-
  bucket(end_time, total, n), v = total / n.
get_avg: 
  {[t] (select avg(x) from data where time within (t - 6; t)) `x};
rolling_average: 
  select end_time: time, average: get_avg'[time] from data
Windowed average
def average_window(data):
  def window(value):
    return math.floor(value["time"] / 7)
  if len(data) < 1:
    return []
  data.sort(key=lambda v: v["time"])
  result = []
  current_window = window(data[0])
  total, count = data[0]["x"], 1
  for value in data[1:]:
    time_window = window(value)
    if time_window != current_window:
      result.append(total / count)
      current_window, total, count = time_window, 0, 0
    total += value["x"]
    count += 1
  result.append(total / count)
  return result
def average_window(data):
  def window(value):
    return math.floor(value["time"] / 7)
  grouped_values = [
    [point["x"] for point in data if window(point) == w]
    for w in set(map(window, data))
  ]
  return [
    sum(values) / len(values)
    for values in grouped_values
  ]
def average_window(data):
  def window(t):
    return math.floor(t / 7)
  result = (data.sort_values("time")
                .set_index("time").groupby(window).mean())
  return result['x'].tolist()
average_window <- function(data) {
  data %>% 
    group_by(floor(time / 7)) %>% 
    summarize(avg = mean(x)) %>% 
    pull(avg)
}
SELECT AVG(x) as x
FROM data
GROUP BY cast(time / 7 as int)
ORDER BY time
.decl window(w: number)
window(t/7) :- data(t, _).
.decl windowed(w: number, x: float)
windowed(t/7, x) :- data(t, x).
.decl windowed_total(w: number, x: float)
windowed_total(w, total / n) :- window(w),
  total = sum x : { windowed(w, x) },
      n = sum z : { windowed(w, x), z=1.0 }.
average_window(v) :- windowed_total(_, v).
average_window:
  (value select[<time] avg x by 7 xbar time from data) `x
GraphsReflexive-transitive closure
def reachable(graph, query):
  adjacency_list = defaultdict(list)
  for edge in graph:
    adjacency_list[edge["source"]].append(edge["target"])
  source = query[0]["source"]

  visited = set()
  to_visit = set([source])
    
  while len(to_visit) > 0:
    current = to_visit.pop()
    if current in visited:
      continue
    for neighbor in adjacency_list[current]:
      to_visit.add(neighbor)
    visited.add(current)
            
  return list(visited)
def reachable(graph, query):
  def step(visited):
    frontier = set([
      edge["target"]
      for vertex in visited
      for edge in graph
      if vertex == edge["source"]
    ])
    return frontier.union(visited)

  def fix(f, x):
    next = f(x)
    return x if next == x else fix(f, next)

  source = query[0]["source"]
  return list(fix(step, set([source])))
WITH RECURSIVE
closure(source, target) AS (
  SELECT source, source FROM graph
  UNION
  SELECT source, target FROM graph
  UNION
  SELECT edge.source, path.target
  FROM closure as path JOIN graph as edge
  ON edge.target = path.source
)
SELECT S.target FROM closure as S
JOIN query ON S.source = query.source
.decl path(x: symbol, y: symbol)
path(x, y) :- graph(x, y).
path(x, y) :- graph(x, z), path(z, y).
reachable(source) :- query(source).
reachable(x) :- query(source), path(source, x).
graph: (first') each graph;
nodes: asc distinct graph[`source], graph[`target];
adj_list: `source xgroup graph;
adj_matrix: 
  {in[;x] each nodes} each nodes ,' (adj_list each nodes) `target;
iterated_matrix: last ({x | any each (x *\: x)} \) adj_matrix;
query_idx: nodes ? (first first query[`source]);
reachable: nodes where iterated_matrix[query_idx]
Strongly-connected components
def scc(graph, query):
  reachable = set()
  vertices = set()
  for edge in graph:
    reachable.add((edge["source"], edge["target"]))
    vertices.add(edge["source"])
    vertices.add(edge["target"])
  changed = True
  while changed:
    changed = False
    for edge in graph:
      for vertex in vertices:
        if ((edge["source"], vertex) not in reachable
            and (edge["target"], vertex) in reachable):
          changed = True
          reachable.add((edge["source"], vertex))
  source = query[0]["source"]
  result = []
  for vertex in vertices:
    if ((source, vertex) in reachable and
        (vertex, source) in reachable):
      result.append(vertex)
  return result
def scc(graph, query):
    def step(relation):
        return set([
            (source, edge["target"])
            for (source, target) in relation
            for edge in graph
            if target == edge["source"]
        ]).union(relation)

    def fix(f, x):
        next = f(x)
        return x if next == x else fix(f, next)

    source = query[0]["source"]
    init = set([(edge["source"], edge["target"]) for edge in graph])
    reachable = fix(step, init)
    return list(set([v for (_, v) in reachable
      if (v, source) in reachable and (source, v) in reachable]))
WITH RECURSIVE
closure(source, target) AS (
  SELECT DISTINCT source, target FROM graph
  UNION
  SELECT edge.source, path.target
  FROM closure as path JOIN graph as edge
  ON edge.target = path.source
),
component(v1, v2) AS (
  SELECT path.source, path.target
  FROM closure as path JOIN closure as path_
  ON path.source = path_.target AND path.target = path_.source
)
SELECT S.v2 FROM component as S
JOIN query ON S.v1 = query.source
.decl scc1(x: symbol, y: symbol)
scc1(x, x) :- vertex(x).
scc1(x, y) :- path(x, y), path(y, x).
.decl path(x: symbol, y: symbol)
path(x, y) :- graph(x, y).
path(x,y) :- graph(x, z), path(z, y).
.decl vertex(x: symbol)
vertex(x)  :- graph(x, _).
vertex(x)  :- graph(_, x).
scc(x) :- query(src), scc1(src, x).