This benchmark is a work-in-progress dataset of programs implementing tabular data analytics tasks, similar to Rosetta Code or 7GUIs. This benchmark has two goals:
| Category | Task name | Python - Imperative | Python - Functional | Python - Pandas | R - Tidyverse | SQL - SQLite | Datalog - Souffle | Q - kdb+ |
|---|---|---|---|---|---|---|---|---|
| Aggregation | Continent with the highest average population | def continent_by_population(countries):
continent_stats = defaultdict(lambda: [0, 0])
for country in countries:
continent = country['continent']
continent_stats[continent][0] += country['population']
continent_stats[continent][1] += 1
max_continent = None
max_average = None
for continent, [total, count] in continent_stats.items():
average = total / count
if max_average is None or max_average < average:
max_average = average
max_continent = continent
return max_continent | def continent_by_population(countries):
continents = set([c['continent'] for c in countries])
populations_by_continent = [
(continent, [c['population'] for c in countries
if c['continent'] == continent])
for continent in continents
]
averages = [
(continent, sum(pops) / len(pops))
for continent, pops in populations_by_continent
]
return max(averages, key=lambda t: t[1])[0] | def continent_by_population(countries):
mean_pop = countries.groupby('continent').population.mean()
return mean_pop.index[mean_pop.argmax()] | continent_by_population <- function(countries) {
countries %>%
group_by(continent) %>%
summarize(mean_pop = mean(population)) %>%
slice(which.max(mean_pop)) %>%
.$continent
} | SELECT continent FROM countries GROUP BY continent ORDER BY AVG(population) DESC LIMIT 1 | .decl average_population(Continent:symbol, Avg:number)
average_population(Continent, Avg) :-
countries(Continent, _, _),
Total = sum P : countries(Continent, _, P),
Num_countries = count : countries(Continent, _, _),
Avg = Total / Num_countries.
continent_by_population(Continent) :-
countries(Continent, _, _),
average_population(Continent, Max_avg),
Max_avg = max A : { countries(C, _, _), average_population(C, A) }. | averages: select avg(population) by continent from countries; continent_by_population: (first select[>population] continent from averages) `continent |
| First element that doesn't change mean |
def get_mean(ls):
sum = 0
for e in ls:
sum += e['value']
return sum/len(ls)
def changing_mean(vals):
start_mean = get_mean(vals)
for i,elem in enumerate(vals):
new_ls = vals[:i] + vals[i+1:]
mean = get_mean(new_ls)
if abs(mean - start_mean) < 0.1:
return i
return None
|
def changing_mean(vals):
start_mean = sum([l['value'] for l in vals])/len(vals)
diff = lambda m, sm: abs(m - sm)
for i,elem in enumerate(vals):
new_ls = [x['value'] for x in vals if x != elem]
mean = sum(new_ls)/len(new_ls)
if diff(mean,start_mean) < 0.1:
return i
return None
| def changing_mean(vals): mean = vals.value.mean() mean_without = vals.apply(lambda row: vals[vals.id != row.id].value.mean(), axis=1) return vals[(mean_without - mean).abs() < 0.1].id.tolist() | library(data.table)
changing_mean <- function(vals) {
global_mean <- mean(vals$value)
setDT(vals)[,
mean_change := abs(global_mean - mean(vals[id != .BY, value])),
by = id] %>%
filter(mean_change < 0.1) %>%
slice(which.min(id)) %>%
pull(id)
} | SELECT id FROM vals v1 WHERE ABS(( SELECT AVG(value) FROM vals v2 WHERE v1.id != v2.id ) - (SELECT AVG(value) FROM vals)) < 0.1 | .decl avg_except(I:number, Avg:float)
avg_except(I, Avg) :-
vals(I, _),
N = sum 1.0 : { vals(J, _), I != J },
Total = sum V : { vals(J, V), I != J },
Avg = Total / N.
changing_mean(I) :-
vals(I, _),
N = sum 1.0 : vals(_, _),
Total = sum V : vals(_, V),
Avg = Total / N,
avg_except(I, AvgExcept),
((Avg > AvgExcept, Avg - AvgExcept < 0.1);
(Avg < AvgExcept, AvgExcept - Avg < 0.1)). | diffs:
{abs avg vals[`value] - avg (vals[`value] where vals[`id] <> x)}
each vals[`id];
changing_mean: first vals[`id] where diffs < 0.1 | |
| Median population for each continent | def continent_median_population(countries):
populations = defaultdict(list)
for country in countries:
populations[country['continent']].append(country['population'])
output = []
for continent, pops in populations.items():
pops.sort()
N = len(pops)
if N % 2 == 1:
median = pops[(N - 1) // 2]
else:
median = (pops[N // 2 - 1] + pops[N // 2]) / 2
output.append({
"continent": continent,
"population": median
})
return output | def continent_median_population(countries):
continents = set([c['continent'] for c in countries])
populations = {
continent: [
c['population'] for c in countries
if c['continent'] == continent
]
for continent in continents
}
def compute_median(pops):
pops = sorted(pops)
N = len(pops)
if N % 2 == 1:
return pops[(N - 1) // 2]
else:
return (pops[N // 2 - 1] + pops[N // 2]) / 2
return [
{"continent": continent, "population": compute_median(pops)}
for continent, pops in populations.items()
] | def continent_median_population(countries):
return (countries
.groupby('continent')
.population.median()
.reset_index()) | continent_median_population <- function(countries) {
countries %>%
group_by(continent) %>%
summarize(population = median(population))
} | SELECT continent, AVG(population) as population
FROM
(SELECT *,
row_number() OVER
(PARTITION BY continent ORDER BY population) AS rank,
count() OVER
(PARTITION BY continent) as count
FROM countries)
WHERE
(count % 2 = 1 AND rank = (count + 1) / 2) OR
(count % 2 = 0 AND ABS(rank - 0.5 - count / 2) = 0.5)
GROUP BY continent | .decl unique_id(Country:symbol, Id:number)
unique_id(Country, $) :- countries(_, Country, _).
.decl rank(Continent:symbol, R:number, Population:float)
rank(Continent, R_less + R_eq, Population) :-
countries(Continent, Country, Population),
unique_id(Country, Id),
R_less = count : {
countries(Continent, C, P),
unique_id(C, Id2),
P < Population
},
R_eq = count : {
countries(Continent, C, P),
unique_id(C, Id2),
P = Population,
Id2 < Id
}.
continent_median_population(Continent, Median) :-
countries(Continent, _, _),
Num_countries = count : countries(Continent, _, _),
((Num_countries % 2 = 1,
rank(Continent, (Num_countries - 1) / 2, Median));
(Num_countries % 2 = 0,
rank(Continent, Num_countries / 2 - 1, P1),
rank(Continent, Num_countries / 2, P2),
Median = (P1 + P2) / 2)). | continent_median_population: () xkey select med[population] by continent from countries | |
| Youngest person over 35 | def youngest_over_35(people):
over_35 = []
for person in people:
if person['age'] > 35:
over_35.append(person)
youngest = None
for person in over_35:
if youngest is None or person['age'] < youngest['age']:
youngest = person
return youngest['name'] if youngest is not None else None | def youngest_over_35(people): over_35 = [p for p in people if p['age'] > 35] youngest = min(over_35, default=None, key=lambda p: p['age']) return youngest['name'] if youngest is not None else None | def youngest_over_35(people):
over_35 = people[people.age > 35]
if len(over_35) == 0:
return None
else:
youngest = over_35.loc[over_35.age.idxmin()]
return youngest['name'] | youngest_over_35 <- function(people) {
people %>%
filter(age > 35) %>%
slice(which.min(age)) %>%
pull(name)
} | SELECT name FROM people WHERE age = ( SELECT MIN(age) FROM people WHERE age > 35) LIMIT 1 | youngest_over_35(Name) :-
people(Age, Name),
Age = min Age : { people(Age, _), Age > 35 }. | old_enough: select from people where age > 35 youngest_over_35: (first select name from old_enough where age = min(age)) `name | |
| Joins | Directors of movies Tom Hanks starred in |
def tom_hanks(actors,directors):
r_directors = set()
for a in actors:
if 'Tom Hanks' == a['actor']:
for d in directors:
if a['movie'] == d['movie']:
r_directors.add(d['director'])
return r_directors
| def tom_hanks(actors,directors): movies = [a['movie'] for a in actors if a['actor'] == 'Tom Hanks'] directors = [d['director'] for d in directors if d['movie'] in movies] return directors |
def tom_hanks(actors,directors):
movies = list(actors.loc[
actors['actor'] == 'Tom Hanks', 'movie'])
return directors[directors['movie'].
isin(movies)]['director']
| tom_hanks <- function(actors, directors) {
actors %>%
filter(actor == "Tom Hanks") %>%
inner_join(directors, by = "movie") %>%
pull(director)
} | SELECT directors.director FROM directors INNER JOIN actors ON directors.movie = actors.movie WHERE actors.actor="Tom Hanks" |
tom_hanks(Director):-
actors("Tom Hanks",movie),
directors(Director,movie).
| tom_hanks: (select director from actors ij (`movie xkey directors) where actor ~\: "Tom Hanks") `director |
| Row per family to row per child |
def row_per_child(families):
children = []
for family in families:
for i in [1, 2, 3]:
children.append({
'family': family['family'],
'child': f'child{i}',
'dob': family[f'dob_child{i}'],
'height': family[f'height_child{i}']
})
return children
| def row_per_child(families):
return [
{'family': family['family'],
'child': f'child{i}',
'dob': family[f'dob_child{i}'],
'height': family[f'height_child{i}']}
for family in families
for i in [1, 2, 3]
] | def row_per_child(families):
df = pd.wide_to_long(
families,
stubnames=['dob', 'height'],
sep="_child",
i='family',
j='child').reset_index()
df.child = df.child.map(lambda c: f'child{c}')
return df | row_per_child <- function(families) {
families %>%
pivot_longer(
!family,
names_to = c(".value", "child"),
names_sep = "_",
)
} | SELECT
family,
('child' || child) AS child,
(CASE child
WHEN 1 THEN dob_child1
WHEN 2 THEN dob_child2
WHEN 3 THEN dob_child3
END) AS dob,
(CASE child
WHEN 1 THEN height_child1
WHEN 2 THEN height_child2
WHEN 3 THEN height_child3
END) AS height
FROM
families
CROSS JOIN
(SELECT 1 as child UNION VALUES (2), (3)) | row_per_child("child1", dob, family, height) :-
families(dob, _, _, family, height, _, _).
row_per_child("child2", dob, family, height) :-
families(_, dob, _, family, _, height, _).
row_per_child("child3", dob, family, height) :-
families(_, _, dob, family, _, _, height). | child_rows: {[child]
rows: ?[families; (); 0b; `family`dob`height !
(`family; `$("dob_child",child); `$("height_child", child))];
update child: (count families)#enlist ("child", child) from rows}
row_per_child: (child_rows each ("1"; "2"; "3")) ,/; | |
| Strings | Convert strings with different formats to numbers | def strings_to_numbers(numbers):
output = []
for row in numbers:
if row["format"] == 'comma_sep':
sep = ","
else:
sep = "_"
output.append(int(row["value"].replace(sep, "")))
return output | def strings_to_numbers(numbers):
return [
int(row["value"].replace(
"," if row["format"] == "comma_sep" else "_", ""))
for row in numbers
] | def strings_to_numbers(numbers):
def convert(row):
sep = "," if row.format == 'comma_sep' else "_"
return int(row.value.replace(sep, ""))
return numbers.apply(convert, axis=1).tolist() | strings_to_numbers <- function(numbers) {
numbers %>%
mutate(output = as.numeric(str_replace_all(
value, ifelse(format == "comma_sep", ",", "_"), ""))) %>%
pull(output)
} | SELECT
CAST(
REPLACE(
value,
CASE format
WHEN "comma_sep" THEN ","
WHEN "under_sep" THEN "_"
END, "")
AS integer)
FROM numbers | .decl clean(Format:symbol, Inp:symbol, I:number, Outp:symbol) clean(Format, Inp, 0, "") :- numbers(Format, Inp). clean(Format, Inp, I+1, Outp) :- clean(Format, Inp, I, Outp_rec), I <= strlen(Inp), Chr = substr(Inp, I, 1), ((Format = "comma_sep", Sep = ","); (Format = "under_sep", Sep = "_")), ((Chr = Sep, Outp = Outp_rec); (Chr != Sep, Outp = cat(Outp_rec, Chr))). strings_to_numbers(N) :- clean(Format, Inp, strlen(Inp), Outp), N = to_number(Outp). | convert: {[val; format]
sep: (("comma_sep"; "under_sep") ! (","; "_")) format;
"J" $ ssr[val; sep; ""]}
strings_to_numbers:
convert'[numbers[`value]; numbers[`format]] |
| Documents with infrequent words | def documents_with_infrequent_words(documents):
words = {}
freq = defaultdict(int)
for doc in documents:
words[doc["id"]] = doc["text"].split(" ")
for word in words[doc["id"]]:
freq[word] += 1
infrequent_words = set()
for word, count in freq.items():
if count == 1:
infrequent_words.add(word)
infrequent_docs = []
for doc in documents:
for word in words[doc["id"]]:
if word in infrequent_words:
infrequent_docs.append(doc["id"])
break
return infrequent_docs | def documents_with_infrequent_words(documents):
words = [doc["text"].split(" ") for doc in documents]
words_flat = [w for ws in words for w in ws]
freq = {
word: words_flat.count(word)
for word in set(words_flat)
}
infrequent_words = set([
word for word, count in freq.items()
if count == 1
])
infrequent_docs = [
documents[i]["id"] for i, ws in enumerate(words)
if len(set(ws) & infrequent_words) > 0
]
return infrequent_docs | def documents_with_infrequent_words(documents):
words = documents.text.str.split(" ", expand=True)
freq = words.stack().value_counts()
infrequent_words = freq[freq == 1].index.values
infrequent_docs = documents[
np.isin(words.values, infrequent_words)]
return infrequent_docs.id.unique().tolist() | documents_with_infrequent_words <- function(documents) {
split <- documents %>%
mutate(word = str_split(text, " ")) %>%
unnest()
freq <- split %>% count(word)
unique_words <- freq %>% filter(n == 1)
split %>%
filter(word %in% unique_words$word) %>%
pull(id) %>%
unique()
} | -- NOTE: SQLite tokenize is case-insensitive by default,
-- so this solution is NOT exactly like the others
CREATE VIRTUAL TABLE doc_index USING fts4(
text, id, content=documents, tokenize=simple);
INSERT INTO doc_index(doc_index) VALUES('rebuild');
CREATE VIRTUAL TABLE words USING fts4aux(doc_index);
SELECT DISTINCT id
FROM
documents
CROSS JOIN
(SELECT DISTINCT term
FROM words
WHERE occurrences = 1) unique_words
WHERE
(LOWER(text) LIKE '% ' || term || ' %') OR
(LOWER(text) LIKE term || ' %') OR
(LOWER(text) LIKE '% ' || term) OR
(LOWER(text) LIKE term) | .decl substrs(Text:symbol, Idx:number, Len:number)
substrs(Text, 0, 1) :-
documents(_, Text), strlen(Text) > 0.
substrs(Text, 0, Len+1) :-
substrs(Text, 0, Len), Len + 1 <= strlen(Text).
substrs(Text, Idx+1, Len) :-
substrs(Text, Idx, Len), Idx + Len + 1 <= strlen(Text).
.decl token(Docid:number, Text:symbol, Idx:number, Word:symbol)
token(Docid, Text, Idx, Word) :-
documents(Docid, Text),
substrs(Text, Idx, Len),
Prev = Idx - 1, Next = Idx + Len,
(Prev < 0; " " = substr(Text, Prev, 1)),
(Next = strlen(Text); " " = substr(Text, Next, 1)),
Word = substr(Text, Idx, Len),
!contains(" ", Word).
documents_with_infrequent_words(Id) :-
documents(Id, _),
token(Id, _, _, Word),
1 = count : token(_, _, _, Word). | words: (" " vs) each documents[`text];
freq: count each group raze words;
uniq: where[freq=1];
documents_with_infrequent_words:
(select id from documents where '[any; in\: [;uniq]] each words) `id | |
| Filter and clean tweets | def process_tweets(data):
result = []
for value in data:
if (value["language"] == "en" and
value["is_retweet"] == "false"):
result.append({
"body": value["body"].lower(),
"ts": value["ts"]
})
return result | def process_tweets(data):
return [
{"body": value["body"].lower(),
"ts": value["ts"]}
for value in data
if value["language"] == "en" and
value["is_retweet"] == "false"
] | def process_tweets(data):
result = data[
(data.language == 'en') &
(data.is_retweet == 'false')]
result.body = result.body.apply(lambda s: s.lower())
return result[["body", "ts"]] | process_tweets <- function(data) {
data %>%
filter(language == "en" & is_retweet == "false") %>%
mutate(body = tolower(body)) %>%
select(ts, body)
} | SELECT LOWER(body) as body, ts FROM data WHERE language = "en" and is_retweet = "false" | process_tweets: select lower[body], ts from data where (is_retweet ~\: "false") and (language ~\: "en") | ||
| First-order logic | People that purchased all possible items | def purchased_all_food(food, orders):
n_food = len(food)
unique_orders = defaultdict(set)
for order in orders:
unique_orders[order["buyer"]].add(order["food"])
buyers = []
for buyer, orders in unique_orders.items():
if len(orders) == n_food:
buyers.append(buyer)
return buyers | def purchased_all_food(food, orders):
n_food = len(food)
buyers = set([order["buyer"] for order in orders])
return [
buyer
for buyer in buyers
if len(set([order["food"] for order in orders
if order["buyer"] == buyer])) == n_food
] | def purchased_all_food(food, orders):
n_food = len(food)
n_unique_orders = (orders
.groupby('buyer')
.food.unique()
.map(len))
return n_unique_orders[n_unique_orders == n_food].index.values | purchased_all_food <- function(food, orders) {
n_food <- count(food)
orders %>%
group_by(buyer) %>%
distinct(food) %>%
count() %>%
filter(n == n_food) %>%
pull(buyer)
} | SELECT DISTINCT buyer FROM orders o1 WHERE (SELECT COUNT(DISTINCT food) FROM orders o2 WHERE o1.buyer = o2.buyer) = (SELECT COUNT(*) FROM food) | .decl has_purchased(Buyer:symbol, Food:number) has_purchased(Buyer, Food) :- orders(Buyer, Food, _). purchased_all_food(Buyer) :- orders(Buyer, _, _), N_food = count : food(_, _), N_unique_orders = count : has_purchased(Buyer, _), N_food = N_unique_orders. | buyers: `buyer xgroup orders;
total: count food;
purchased_all_food:
(where {(count distinct x[`food]) = total} each buyers) `buyer |
| People who like a unique set of beer | def unique_beer_drinkers(likes):
likes_per_person = defaultdict(set)
for row in likes:
likes_per_person[row['name']].add(row['beer'])
unique = []
for p1, p1_likes in likes_per_person.items():
is_unique = True
for p2, p2_likes in likes_per_person.items():
if p1 == p2:
continue
if p1_likes == p2_likes:
is_unique = False
break
if is_unique:
unique.append(p1)
return unique | def unique_beer_drinkers(likes):
people = set([row['name'] for row in likes])
likes_per_person = {
name: set([row['beer'] for row in likes if row['name'] == name])
for name in people
}
return [
name
for name, beers in likes_per_person.items()
if not any([
other_name != name and beers == other_beers
for other_name, other_beers in likes_per_person.items()
])
] | def unique_beer_drinkers(likes):
likes_per_person = (likes
.groupby('name')
.beer.unique().map(set)
.reset_index())
def check_not_exists(row):
other_people = likes_per_person[
likes_per_person.name != row['name']]
return not (other_people.beer == row['beer']).any()
unique_drinkers = likes_per_person[
likes_per_person.apply(check_not_exists, axis=1)]
return unique_drinkers.name.tolist() | unique_beer_drinkers <- function(likes) {
likes %>%
group_by(name) %>%
summarize(beer_set = list(sort(unique(beer)))) %>%
add_count(beer_set, name = 'num_people_likes') %>%
filter(num_people_likes == 1) %>%
pull(name)
} | SELECT DISTINCT L1.name
FROM likes L1
WHERE NOT EXISTS(
SELECT *
FROM likes L2
WHERE L1.name != L2.name
AND NOT EXISTS(
SELECT *
FROM likes L3
WHERE L3.name = L2.name
AND NOT EXISTS(
SELECT *
FROM likes L4
WHERE L4.name = L1.name
AND L4.beer = L3.beer))
AND NOT EXISTS(
SELECT *
FROM likes L5
WHERE L5.name = L1.name
AND NOT EXISTS(
SELECT *
FROM likes L6
WHERE L6.name = L2.name
AND L6.beer= L5.beer))) | .decl differ(a:symbol, b:symbol) differ(A, B) :- likes(Beer, A), likes(_, B), !likes(Beer, B). differ(A, B) :- likes(_, A), likes(Beer, B), !likes(Beer, A). .decl exists_same(a:symbol) exists_same(Name) :- likes(_, Other), likes(_, Name), Name != Other, !differ(Name, Other). unique_beer_drinkers(Name) :- likes(_, Name), !exists_same(Name). | likes_per_person: `name xgroup likes; counts: count each group likes_per_person; unique_beer_drinkers: (select name from likes_per_person where beer in\: where[counts=1]) `name | |
| Time Series | Rolling average | def rolling_average(data):
data.sort(key=lambda v: v["time"])
result = []
for i, value in enumerate(data):
end = value["time"]
total, count = 0.0, 0
for j in range(i, -1, -1):
if data[j]["time"] <= end - 7:
break
total += data[j]["x"]; count += 1
result.append(
{"end_time": end,
"average": total / count}
)
return result | def rolling_average(data):
return [
{"end_time": x["time"],
"average": sum(vs) / len(vs)}
for x in data
for vs in [
[y["x"] for y in data
if y["time"] <= x["time"] and
y["time"] > x["time"] - 7]
]
] | def rolling_average(data):
d = data.copy()
data.time = pd.to_datetime(data.time * 10**9)
data = (data.sort_values('time').set_index('time')
.rolling(window='7s').mean())
return pd.DataFrame.from_dict(
{'end_time': d.sort_values('time').reset_index().time,
'average': data.reset_index().x}
) | library(slider)
rolling_average <- function(data) {
data <- arrange(data, time)
avgs <- unlist(slide_index(data$x, data$time, ~ mean(.x), .before = 6))
data %>% mutate(end_time = time, average = avgs) %>% select(end_time, average)
} | SELECT end.time as end_time, AVG(other.x) as average FROM data as end JOIN data as other ON other.time <= end.time and other.time > end.time - 7 GROUP BY end.time | .decl window(end_time: number, time: number)
window(end_time, t) :-
data(end_time, _), data(t, _),
t <= end_time, t > end_time - 7.
.decl bucket(end_time: number, total: float, n: float)
bucket(end_time, total, n) :- data(end_time, _),
total = sum v : {data(t, v), window(end_time, t)},
n = sum z : {data(t, _), window(end_time, t), z = 1.0}.
rolling_average(end_time, v) :-
bucket(end_time, total, n), v = total / n. | get_avg:
{[t] (select avg(x) from data where time within (t - 6; t)) `x};
rolling_average:
select end_time: time, average: get_avg'[time] from data |
| Windowed average | def average_window(data):
def window(value):
return math.floor(value["time"] / 7)
if len(data) < 1:
return []
data.sort(key=lambda v: v["time"])
result = []
current_window = window(data[0])
total, count = data[0]["x"], 1
for value in data[1:]:
time_window = window(value)
if time_window != current_window:
result.append(total / count)
current_window, total, count = time_window, 0, 0
total += value["x"]
count += 1
result.append(total / count)
return result | def average_window(data):
def window(value):
return math.floor(value["time"] / 7)
grouped_values = [
[point["x"] for point in data if window(point) == w]
for w in set(map(window, data))
]
return [
sum(values) / len(values)
for values in grouped_values
] | def average_window(data):
def window(t):
return math.floor(t / 7)
result = (data.sort_values("time")
.set_index("time").groupby(window).mean())
return result['x'].tolist() | average_window <- function(data) {
data %>%
group_by(floor(time / 7)) %>%
summarize(avg = mean(x)) %>%
pull(avg)
} | SELECT AVG(x) as x FROM data GROUP BY cast(time / 7 as int) ORDER BY time | .decl window(w: number)
window(t/7) :- data(t, _).
.decl windowed(w: number, x: float)
windowed(t/7, x) :- data(t, x).
.decl windowed_total(w: number, x: float)
windowed_total(w, total / n) :- window(w),
total = sum x : { windowed(w, x) },
n = sum z : { windowed(w, x), z=1.0 }.
average_window(v) :- windowed_total(_, v). | average_window: (value select[<time] avg x by 7 xbar time from data) `x | |
| Graphs | Reflexive-transitive closure | def reachable(graph, query):
adjacency_list = defaultdict(list)
for edge in graph:
adjacency_list[edge["source"]].append(edge["target"])
source = query[0]["source"]
visited = set()
to_visit = set([source])
while len(to_visit) > 0:
current = to_visit.pop()
if current in visited:
continue
for neighbor in adjacency_list[current]:
to_visit.add(neighbor)
visited.add(current)
return list(visited) | def reachable(graph, query):
def step(visited):
frontier = set([
edge["target"]
for vertex in visited
for edge in graph
if vertex == edge["source"]
])
return frontier.union(visited)
def fix(f, x):
next = f(x)
return x if next == x else fix(f, next)
source = query[0]["source"]
return list(fix(step, set([source]))) | WITH RECURSIVE closure(source, target) AS ( SELECT source, source FROM graph UNION SELECT source, target FROM graph UNION SELECT edge.source, path.target FROM closure as path JOIN graph as edge ON edge.target = path.source ) SELECT S.target FROM closure as S JOIN query ON S.source = query.source | .decl path(x: symbol, y: symbol) path(x, y) :- graph(x, y). path(x, y) :- graph(x, z), path(z, y). reachable(source) :- query(source). reachable(x) :- query(source), path(source, x). | graph: (first') each graph;
nodes: asc distinct graph[`source], graph[`target];
adj_list: `source xgroup graph;
adj_matrix:
{in[;x] each nodes} each nodes ,' (adj_list each nodes) `target;
iterated_matrix: last ({x | any each (x *\: x)} \) adj_matrix;
query_idx: nodes ? (first first query[`source]);
reachable: nodes where iterated_matrix[query_idx] | ||
| Strongly-connected components | def scc(graph, query):
reachable = set()
vertices = set()
for edge in graph:
reachable.add((edge["source"], edge["target"]))
vertices.add(edge["source"])
vertices.add(edge["target"])
changed = True
while changed:
changed = False
for edge in graph:
for vertex in vertices:
if ((edge["source"], vertex) not in reachable
and (edge["target"], vertex) in reachable):
changed = True
reachable.add((edge["source"], vertex))
source = query[0]["source"]
result = []
for vertex in vertices:
if ((source, vertex) in reachable and
(vertex, source) in reachable):
result.append(vertex)
return result | def scc(graph, query):
def step(relation):
return set([
(source, edge["target"])
for (source, target) in relation
for edge in graph
if target == edge["source"]
]).union(relation)
def fix(f, x):
next = f(x)
return x if next == x else fix(f, next)
source = query[0]["source"]
init = set([(edge["source"], edge["target"]) for edge in graph])
reachable = fix(step, init)
return list(set([v for (_, v) in reachable
if (v, source) in reachable and (source, v) in reachable])) | WITH RECURSIVE closure(source, target) AS ( SELECT DISTINCT source, target FROM graph UNION SELECT edge.source, path.target FROM closure as path JOIN graph as edge ON edge.target = path.source ), component(v1, v2) AS ( SELECT path.source, path.target FROM closure as path JOIN closure as path_ ON path.source = path_.target AND path.target = path_.source ) SELECT S.v2 FROM component as S JOIN query ON S.v1 = query.source | .decl scc1(x: symbol, y: symbol) scc1(x, x) :- vertex(x). scc1(x, y) :- path(x, y), path(y, x). .decl path(x: symbol, y: symbol) path(x, y) :- graph(x, y). path(x,y) :- graph(x, z), path(z, y). .decl vertex(x: symbol) vertex(x) :- graph(x, _). vertex(x) :- graph(_, x). scc(x) :- query(src), scc1(src, x). |