Specification: For each continent, return its name and the median population of its countries.
Example input/output:
Input:
countries
name | population | continent |
---|---|---|
USA | 328 | North America |
USA2 | 37 | North America |
Canada | 37 | North America |
Ethiopia | 109 | Africa |
Output:
continent population North America
37
Africa
109
Python - Imperative
def continent_median_population(countries): populations = defaultdict(list) for country in countries: populations[country['continent']].append(country['population']) output = [] for continent, pops in populations.items(): pops.sort() N = len(pops) if N % 2 == 1: median = pops[(N - 1) // 2] else: median = (pops[N // 2 - 1] + pops[N // 2]) / 2 output.append({ "continent": continent, "population": median }) return output
Python - Functional
def continent_median_population(countries): continents = set([c['continent'] for c in countries]) populations = { continent: [ c['population'] for c in countries if c['continent'] == continent ] for continent in continents } def compute_median(pops): pops = sorted(pops) N = len(pops) if N % 2 == 1: return pops[(N - 1) // 2] else: return (pops[N // 2 - 1] + pops[N // 2]) / 2 return [ {"continent": continent, "population": compute_median(pops)} for continent, pops in populations.items() ]
Python - Pandas
def continent_median_population(countries): return (countries .groupby('continent') .population.median() .reset_index())
R - Tidyverse
continent_median_population <- function(countries) { countries %>% group_by(continent) %>% summarize(population = median(population)) }
SQL - SQLite
SELECT continent, AVG(population) as population FROM (SELECT *, row_number() OVER (PARTITION BY continent ORDER BY population) AS rank, count() OVER (PARTITION BY continent) as count FROM countries) WHERE (count % 2 = 1 AND rank = (count + 1) / 2) OR (count % 2 = 0 AND ABS(rank - 0.5 - count / 2) = 0.5) GROUP BY continent
Datalog - Souffle
.decl unique_id(Country:symbol, Id:number) unique_id(Country, $) :- countries(_, Country, _). .decl rank(Continent:symbol, R:number, Population:float) rank(Continent, R_less + R_eq, Population) :- countries(Continent, Country, Population), unique_id(Country, Id), R_less = count : { countries(Continent, C, P), unique_id(C, Id2), P < Population }, R_eq = count : { countries(Continent, C, P), unique_id(C, Id2), P = Population, Id2 < Id }. continent_median_population(Continent, Median) :- countries(Continent, _, _), Num_countries = count : countries(Continent, _, _), ((Num_countries % 2 = 1, rank(Continent, (Num_countries - 1) / 2, Median)); (Num_countries % 2 = 0, rank(Continent, Num_countries / 2 - 1, P1), rank(Continent, Num_countries / 2, P2), Median = (P1 + P2) / 2)).
Q - kdb+
continent_median_population: () xkey select med[population] by continent from countries